directory = $directory; } $this->runFromCommandLine($_SERVER['argv']); } public function init(): void { $this->_cluster = Cassandra::cluster() ->withContactPoints(static::$cassandraHost) ->withPort(9042) ->withCredentials( static::CASSANDRA_USERNAME, static::CASSANDRA_PASSWORD ) ->build(); if ($this->_cluster) { try { $this->cassandra = $this->_cluster->connect(static::CASSANDRA_KEYSPACE); } catch (Exception $e) { echo "err\n"; } } // static::$bucketMagic = defined('CASSANDRA_BUCKET_MAGIC') ? (int) CASSANDRA_BUCKET_MAGIC : 4; } public function runFromCommandLine($arguments) { $shortOptions = "hd:v:o:r:s:"; $longOptions = ["help", "directory:", "version:", "v", "output:", "o", "remove:", "r", "source:", "s"]; $options = getopt($shortOptions, $longOptions); if (count($options) == 0 || isset($options['h']) || isset($options['help'])) { $this->displayHelpMessage(); exit; } $directory = isset($options['directory']) ? $options['directory'] : (isset($options['d']) ? $options['d'] : null); $schemaVersion = isset($options['version']) ? $options['version'] : (isset($options['v']) ? $options['v'] : null); $source = isset($options['source']) ? $options['source'] : (isset($options['s']) ? $options['s'] : null); $remove = isset($options['remove']) ? $options['remove'] : (isset($options['r']) ? $options['r'] : null); $this->structured_directory = isset($options['output']) ? $options['output'] : (isset($options['o']) ? $options['o'] : null); if (($directory === null || $schemaVersion === null) && $remove === null && $source === null) { echo "Missing Attachment directory or schema version.\n"; exit; } if ($schemaVersion && !in_array($schemaVersion, [1, 2])) { echo "Invalid schema version. Only versions 1 and 2 are supported.\n"; exit; } if ($this->structured_directory == null) { $this->structured_directory = './'; } static::$schemaVersion = (int)$schemaVersion; $this->directory = $directory; $this->retrived_csv = './result_from_physical_files.csv'; if ($remove && $source) { $this->processAttachmentDeletionCSV($remove, $source); } else { $this->checkConsistency('attachment_file_info'); } exit; } private function displayHelpMessage() { $helpMessage = <<init(); $dbEntries = $this->getDbEntries($tableName); $fileEntries = $this->getFileEntries($this->directory); $this->process_files_in_directory($this->structured_directory); //$this->createCSVById($this->retrived_csv); } private function getFileEntries($directory) { $files = glob($directory . '/*'); $entries = []; foreach ($files as $file) { if (is_file($file)) { $fileName = basename($file); // Skip files with specific suffixes if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) { continue; } $dashParts = explode("-", $fileName, 2); $dotParts = explode(".", $fileName); if (count($dashParts) === 2) { $clientId = $dashParts[0]; $id = $dashParts[1]; $fileParts = $dashParts; $delimiter = '-'; } elseif (count($dotParts) >= 2) { $clientId = $dotParts[0]; $id = implode(".", array_slice($dotParts, 1)); $fileParts = $dotParts; $delimiter = '.'; } else { // Handle cases where the file name does not contain either a dash or a dot continue; } $filePath = $file; $size = filesize($filePath); // Use file modification time (filemtime()) instead of file creation time (filectime()) $creationTime = date('Y-m-d H:i:s', filemtime($filePath)); $dynamicValue = substr($id, 0, 2); $creationTime = str_replace('"', '', $creationTime); $entries[$dynamicValue][] = [ 'client_id' => $clientId, 'file_path' => $filePath, 'file_name' => implode($delimiter, $fileParts), 'size' => $size, 'creation_time' => $creationTime, ]; } } // create CSV file for each client ID's physical file entries foreach ($entries as $clientId => $clientEntries) { $this->createPhysicalFileCSV($clientId, $clientEntries); } return $entries; } private function createPhysicalFileCSV($clientId, $entries) { $fileName = $this->structured_directory . "physical_" . $clientId . ".csv"; $csvFile = fopen($fileName, 'w'); fputcsv($csvFile, ['id', 'size', 'creation_time']); foreach ($entries as $entry) { fputcsv($csvFile, [ $entry['file_name'], $entry['size'], $entry['creation_time'] ]); } fclose($csvFile); } private function createDBFileCSV($clientId, $entries) { $fileName = $this->structured_directory . "cassandra_" . (string) $clientId . ".csv"; $csvFile = fopen($fileName, 'w'); $headers = ['id', 'size', 'creation_time', 'filename', 'bucket', 'client_id', 'attachment_id']; fputcsv($csvFile, $headers); foreach ($entries as $key => $entry) { $rowData = [ $entry['id'], $entry['size'], $entry['creation_time'], $entry['filename'], $entry['bucket'], $entry['client_id'], $entry['attachment_id'], ]; fputcsv($csvFile, $rowData); } fclose($csvFile); } private function getDbEntries($tableName) { if($this->schema_version() === 1) { $query = "SELECT client_id, id, size, filename, created_on FROM $tableName"; } else { $query = "SELECT client_id, id, size, filename, created_on, bucket FROM $tableName"; } $arguments = []; $result = $this->cassandra->execute( $query, [ 'arguments' => $arguments, 'page_size' => static::DEFAULT_PAGE_SIZE ] ); $entries = []; while ($result) { foreach ($result as $row) { if (preg_match('/^[0-9]+$/', $row['id'])) { $dotParts = explode(".", $row['filename'], 2); $dynamicValue = substr($dotParts[1], 0, 2); $timestamp = (int) $row['created_on']; $date = date('Y-m-d H:i:s', $timestamp); $creationTime = str_replace('"', '', $date); $entry = [ "id" => $row['id'], "size" => (string) $row['size'], "creation_time" => $creationTime, 'filename' => $row['filename'], 'bucket' => $row['bucket'] ? $row['bucket'] : '', 'client_id' => (string) $row['client_id'], 'attachment_id' => (string) $row['id'], ]; $entries[$dynamicValue][$row['filename']] = $entry; } else { $clientId = substr($row['id'], 0, 2); $file = $row['client_id'] . '-' . $row['id']; $timestamp = (int) $row['created_on']; $date = date('Y-m-d H:i:s', $timestamp); $creationTime = str_replace('"', '', $date); $entry = [ "id" => $file, "size" => (string) $row['size'], "creation_time" => $creationTime, 'filename' => $row['filename'], 'bucket' => $row['bucket'] ? $row['bucket'] : '', 'client_id' => (string) $row['client_id'], 'attachment_id' => (string) $row['id'], ]; if (!isset($entries[$clientId])) { $entries[$clientId] = []; } $entries[$clientId][$file] = $entry; } } $result = $result->nextPage(); } // Merge all entries into a single array $allEntries = []; foreach ($entries as $clientEntries) { $allEntries = array_merge($allEntries, $clientEntries); } // Create CSV file for each client ID's physical file entries foreach ($entries as $clientId => $clientEntries) { $this->createDBFileCSV($clientId, $clientEntries); } return $entries; } private function createCSVById($csvFile) { $data = []; $fileData = array_map('str_getcsv', file($csvFile)); $headers = array_shift($fileData); $idIndex = array_search('File path', $headers); $old_migrated_data = array_search('migrated_files_id', $headers); $query = "SELECT * FROM attachment_file_info WHERE id = ? and client_id = ?"; if ($this->schema_version() == 2) { $query = $query . ' and bucket = ?'; } $statement = $this->cassandra->prepare($query); foreach ($fileData as $row) { if ($old_migrated_data !== false && isset($row[$old_migrated_data])) { $id = $row[$old_migrated_data]; } else { $id = $row[2]; } $options = ['arguments' => [$id]]; $result = $this->cassandra->execute($statement, $options); foreach ($result as $row) { $data[] = $row; } } $fileName = "data_by_id.csv"; $csvFile = fopen($fileName, 'w'); fputcsv($csvFile, $headers); foreach ($data as $entry) { $rowData = []; foreach ($headers as $field) { $value = isset($entry[$field]) ? $entry[$field] : ''; $rowData[] = $value; } fputcsv($csvFile, $rowData); } fclose($csvFile); } private function parseCSVFile($file) { $file_contents = file_get_contents($file); $file_lines = explode("\n", $file_contents); $file_assoc = array(); foreach ($file_lines as $line) { if ($line === reset($file_lines)) { continue; } $values = explode(",", $line); if (count($values) == 3) { $values[2] = str_replace('"', '', $values[2]); $file_assoc[$values[0]] = [$values[1], $values[2]]; } } return $file_assoc; } // Helper function to read file contents and parse CSV lines private function compareFileEntries($id, $data, $cassandra_file_assoc) { if (!isset($cassandra_file_assoc[$id])) { return [ 'id' => $id, 'file1' => [$id, trim($data[0], '"'), trim($data[1], '"')], ]; } else { $physical_value1 = trim($data[0], '"'); $physical_value2 = trim($data[1], '"'); $cassandra_value1 = trim($cassandra_file_assoc[$id][0], '"'); $cassandra_value2 = trim($cassandra_file_assoc[$id][1], '"'); if ($physical_value1 !== $cassandra_value1 || $physical_value2 !== $cassandra_value2) { return [ 'id' => $id, 'file1' => [$id, $physical_value1, $physical_value2], 'file2' => [$id, $cassandra_value1, $cassandra_value2], ]; } } return null; } private function getFileLines($file) { $file_contents = file_get_contents($file); return explode("\n", $file_contents); } private function filterAndMapEntries($entries) { $filtered_entries = array_filter($entries); $mapped_entries = array_map(function ($entry) { return $entry[0]; }, $filtered_entries); return array_values($mapped_entries); } private function process_files_in_directory($dir) { $files = glob($dir . '/*.csv'); $physical_files = array(); $cassandra_files = array(); foreach ($files as $file) { $filename = basename($file, '.csv'); $csv_type = substr($filename, 0, strpos($filename, '_')); $file_num = substr($filename, strpos($filename, '_') + 1); if ($csv_type == 'physical') { $physical_files[$file_num] = $file; } elseif ($csv_type == 'cassandra') { $cassandra_files[$file_num] = $file; } } ksort($physical_files); ksort($cassandra_files); $missing_physical_files = array(); $missing_cassandra_entries = array(); $cassandra_entries = []; $physical_entries = []; foreach ($physical_files as $file_num => $physical_file) { $cassandra_file = $dir . '/cassandra_' . $file_num . '.csv'; if (file_exists($physical_file) && file_exists($cassandra_file)) { $compared_physical[] = $this->compare_csv_files($physical_file, $cassandra_file); $compared_cassandra[] = $this->compare_csv_files($cassandra_file, $physical_file); $physical_entries = $this->filterAndMapEntries($compared_cassandra); //echo "phe: " . var_dump($physical_entries) . PHP_EOL; $cassandra_entries = $this->filterAndMapEntries($compared_physical); } else { if (!file_exists($physical_file)) { $missing_physical_files[] = $physical_file; echo "cfl: " . var_dump($missing_physical_files) . PHP_EOL; } if (!file_exists($cassandra_file)) { $missing_cassandra_files[] = $cassandra_file; } } if (!file_exists($cassandra_file)) { $physical_file_lines = $this->getFileLines($physical_file); $file_assoc = array(); foreach ($physical_file_lines as $line) { if ($line === reset($physical_file_lines)) { continue; } $values = explode(",", $line); if (count($values) == 3) { $values[2] = str_replace('"', '', $values[2]); $file_assoc[$values[0]] = [$values[1], $values[2]]; } } foreach ($file_assoc as $id => $data) { if (!isset($cassandra_files_assoc[$id])) { $missing_cassandra_entries[] = [ 'id' => $id, 'file1' => [$id, $data[0], $data[1]], ]; } } } else { } } foreach ($cassandra_files as $file_num => $cassandra_file) { $physical_file = $dir . '/physical_' . $file_num . '.csv'; if (!file_exists($physical_file)) { $cassandra_file_lines = $this->getFileLines($cassandra_file); // echo "cfl: " . var_dump($cassandra_file_lines) . PHP_EOL; $file_assoc = $this->buildFileAssociation($cassandra_file_lines); foreach ($file_assoc as $id => $data) { if (!isset($physical_files_assoc[$id])) { $missing_physical_files[] = [ 'id' => $id, 'file1' => [$id, $data[0], $data[1], $data[2], $data[3], $data[4], $data[5]], ]; } } } } $result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR); // var_dump($missing_physical_files); $result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR); $this->generateCsvReportForDbEntries($result_from_cassandra_entries, 'result_from_cassandra_entries.csv'); $this->generateHtmlReport($result_from_cassandra_entries, 'cassandra.html'); $this->generateCsvReportForPhysicalFiles($result_from_physical_files, 'result_from_physical_files.csv'); $this->generateHtmlReport($result_from_physical_files, 'physical.html'); // $this->compareCSVFilesTransform($this->structured_directory . 'cassandra_all_entries.csv', $this->structured_directory . 'result_from_cassandra_entries.csv', $this->structured_directory . 'final_file.csv'); // $this->deletePhysicalFilesFromCsv('result_from_cassandra_entries.csv'); } private function buildFileAssociation($file_lines) { $file_assoc = []; foreach ($file_lines as $line) { if ($line === reset($file_lines)) { continue; } $values = explode(",", $line); $values[2] = str_replace('"', '', $values[2]); if (count($values) == 3) { $file_assoc[$values[0]] = [$values[1], $values[2]]; } if (count($values) > 3) { $file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[6]]; } } return $file_assoc; } private function compare_csv_files($file1_path, $file2_path) { $file1_data = array_map('str_getcsv', file($file1_path)); $file2_data = array_map('str_getcsv', file($file2_path)); $file1_headers = array_shift($file1_data); $file2_headers = array_shift($file2_data); // find indexes of columns in each file $id_index_1 = array_search('id', $file1_headers); $id_index_2 = array_search('id', $file2_headers); $size_index_1 = array_search('size', $file1_headers); $size_index_2 = array_search('size', $file2_headers); $time_index_1 = array_search('creation_time', $file1_headers); $time_index_2 = array_search('creation_time', $file2_headers); $filename_index_1 = array_search('filename', $file1_headers); $filename_index_2 = array_search('filename', $file2_headers); $clientid_index_1 = array_search('client_id', $file1_headers); $bucket_index_1 = array_search('bucket', $file1_headers); $attachment_id_index_1 = array_search('attachment_id', $file1_headers); $file1_assoc = array_reduce($file1_data, function ($result, $row) use ($id_index_1, $size_index_1, $time_index_1, $filename_index_1, $clientid_index_1, $bucket_index_1, $attachment_id_index_1) { $result[$row[$id_index_1]] = [ 'id' => $row[$id_index_1], 'file1' => [ $row[$id_index_1], $row[$size_index_1], $row[$time_index_1], isset($row[$filename_index_1]) ? $row[$filename_index_1] : null, isset($row[$clientid_index_1]) ? $row[$clientid_index_1] : null, isset($row[$bucket_index_1]) ? $row[$bucket_index_1] : null, isset($row[$attachment_id_index_1]) ? $row[$attachment_id_index_1] : null ], ]; return $result; }, []); $file2_assoc = array_reduce($file2_data, function ($result, $row) use ($id_index_2, $size_index_2, $time_index_2, $filename_index_2) { $result[$row[$id_index_2]] = [ 'id' => $row[$id_index_2], 'file2' => [ $row[$id_index_2], $row[$size_index_2], $row[$time_index_2], isset($row[$filename_index_2]) ? $row[$filename_index_2] : null ], ]; return $result; }, []); $missing_entries = []; foreach ($file1_assoc as $id => $data) { if (!isset($file2_assoc[$id])) { $missing_entries[] = [ 'id' => $id, 'file1' => $data['file1'], ]; } else { $file2_data = $file2_assoc[$id]['file2']; if ($data['file1'][1] !== $file2_data[1] || $data['file1'][2] !== $file2_data[2]) { $missing_entries[] = [ 'id' => $id, 'file1' => $data['file1'], 'file2' => $file2_data, ]; } } } return $missing_entries; } /** * Generates a CSV report of inconsistent files. * * @param array $inconsistentFiles An array of inconsistent files. * @param string $filename The filename to use for the report. */ private function generateCsvReportForPhysicalFiles(array $inconsistentFiles, string $name): void { $fp = fopen($name, 'w'); fputcsv($fp, ['File/Attachment', 'File path', 'File name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time', 'ClientId', 'Bucket' , 'Id']); foreach ($inconsistentFiles as $row) { $file1Value = $row['file1'][0]; $check_value = $row['id']; //$check_value = is_numeric($row['id']) ? $row['file1'][3] : $file1Value; $filePath = $this->directory . '/' . $check_value ? $this->directory . '/' . $check_value : $this->directory . '/' . $row['file2'][0]; if (is_numeric($row['id'])) { $filePath = $this->directory . '/' . $row['file1'][3]; $check_value = $row['file1'][3]; } $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; $clientId = isset($row['file1'][4]) ? $row['file1'][4] : ''; $bucket = isset($row['file1'][5]) ? $row['file1'][5] : ''; $attachmentId = isset($row['file1'][5]) ? $row['file1'][6] : ''; fputcsv($fp, [ 'Attachment', $filePath, $check_value, $thumb1, $thumb2, $size, $creationTime, $clientId, $bucket, $attachmentId, ]); } fclose($fp); chmod($name, 0666); } private function generateCsvReportForDbEntries(array $inconsistentFiles, string $name): void { $fp = fopen($name, 'w'); fputcsv($fp, ['File/Attachment', 'Entry Path', 'Entry Name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time']); foreach ($inconsistentFiles as $row) { $filePath = $row['file1'][0] ? $row['file1'][0] : $row['file2'][0]; $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; fputcsv($fp, [ 'Attachment', $filePath, $row['file1'][0], $thumb1, $thumb2, $size, $creationTime ]); } fclose($fp); chmod($name, 0666); } /** * Generates an HTML report of inconsistent files. * * @param array $inconsistentFiles An array of inconsistent files. * @param string $filename The filename to use for the report. */ private function generateHtmlReport(array $inconsistentFiles, string $name): void { $file = fopen($name, 'w'); if (!$file) { throw new Exception('Failed to open the file for writing.'); } $html = ''; foreach ($inconsistentFiles as $row) { $filePath = $this->directory . '/' . $row['file1'][0]; $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; // Write the properties to the HTML table $html .= ''; } $html .= '
File/AttachmentFile pathFile nameThumb 1Thumb 2SizeCreation Time
Attachment' . htmlspecialchars($filePath) . '' . htmlspecialchars($row['file1'][0]) . '' . htmlspecialchars($thumb1) . '' . htmlspecialchars($thumb2) . '' . htmlspecialchars($size) . '' . htmlspecialchars($creationTime) . '
'; fwrite($file, $html); fclose($file); } function compareCSVFilesTransform($firstFile, $secondFile, $finalFile) { // Read the first CSV file $firstData = array_map('str_getcsv', file($firstFile)); $firstHeaders = array_shift($firstData); // Read the second CSV file $secondData = array_map('str_getcsv', file($secondFile)); $secondHeaders = array_shift($secondData); // Find the indexes of the columns to compare in both files $firstIdIndex = array_search('id', $firstHeaders); $firstSizeIndex = array_search('size', $firstHeaders); $firstCreationTimeIndex = array_search('creation_time', $firstHeaders); $secondIdIndex = array_search('Entry Path', $secondHeaders); $secondSizeIndex = array_search('size', $secondHeaders); $secondCreationTimeIndex = array_search('creation_time', $secondHeaders); // Get the entries from the first file $firstEntries = []; foreach ($firstData as $row) { $firstEntries[$row[$firstIdIndex]] = [ 'id' => $row[$firstIdIndex], 'size' => $row[$firstSizeIndex], 'creation_time' => $row[$firstCreationTimeIndex] ]; } // Create the final result CSV file $finalCsvFile = fopen($finalFile, 'w'); fputcsv($finalCsvFile, ['id', 'size', 'creation_time']); // Compare the entries from the second file with the entries in the first file foreach ($secondData as $row) { $secondId = $row[$secondIdIndex]; $secondSize = $row[$secondSizeIndex]; $secondCreationTime = $row[$secondCreationTimeIndex]; // Check if there is a matching entry in the first file if ( isset($firstEntries[$secondId]) && $firstEntries[$secondId]['size'] === $secondSize && $firstEntries[$secondId]['creation_time'] === $secondCreationTime ) { fputcsv($finalCsvFile, [ $secondId, $secondSize, $secondCreationTime ]); } } fclose($finalCsvFile); } private function deletePhysicalFilesFromCsv(string $csvFile): void { $directory = $this->directory . '/'; $entries = []; if (($handle = fopen($csvFile, 'r')) !== false) { // Collect the file paths to be deleted while (($data = fgetcsv($handle)) !== false) { // Skip the header row if ($data[0] === 'File/Attachment') { continue; } $filePath = $directory . $data[1]; $entries[] = $filePath; } fclose($handle); } // Prompt the user for confirmation before deleting the files echo "Before proceeding with the deletion, make sure you have a backup of your data." . PHP_EOL; echo "You can revert back to the backup in case of accidental data loss." . PHP_EOL; echo "Do you want to delete the physical files listed in the CSV report? (yes/no): "; $confirmation = trim(fgets(STDIN)); if (strtolower($confirmation) === 'yes') { // Create a log file to record the deleted files $logFile = 'deleted_files.log'; $logHandle = fopen($logFile, 'a'); // Delete the physical files foreach ($entries as $filePath) { if (file_exists($filePath)) { // unlink($filePath); echo "File deleted: $filePath" . PHP_EOL; // Write the deleted file path to the log file fwrite($logHandle, "Deleted file: $filePath" . PHP_EOL); } else { echo "File not found: $filePath" . PHP_EOL; } } fclose($logHandle); echo "Deletion completed. The list of deleted files is saved in '$logFile'." . PHP_EOL; } else { echo "Deletion of physical files aborted." . PHP_EOL; } } private static function _is_uuid(string $id): bool { $regex = '/^[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}$/'; return preg_match($regex, $id); } /** * Check if the attachment string is a valid migrated attachment * * @param string $attachmentId * * @return bool */ private static function _isValidMigratedAttachment(string $attachmentId = ''): bool { return str::sub($attachmentId, 0, 2) === OLD_ATTACHMENTS_PREFIX; } /* ATTACHMENT DELETION ===================================================================================================================== */ /** * Returns cassandra schema version * * * @return int */ private function schema_version() { return static::$schemaVersion; } /** * Returns info(id,size) for attachment file * * @param int $clientId * @param string|null $bucketId * @param string $id * * @return object */ private function get_info(int $clientId, ?string $bucketId, string $id): ?object { $attachment = null; $args = [ 'client_id' => $clientId, 'id' => $id ]; if ($this->schema_version() == 1) { $query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ?'); } else { $q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\''; echo "Q: [" . $q . "]\n"; $query = $this->cassandra->prepare($q); } $res = $this->cassandra->execute( $query, [ 'arguments' => $args ] ); if ($res && $res->valid()) { $tmp = $res->current(); $attachment = (object) $tmp; $attachment->id = (string) $tmp['id']; if (array_key_exists('size', $tmp)) { $attachment->size = (int) $tmp['size']; } } return $attachment; } /** * prepares 'bucket' field for partitioning * * @param string $id * @return string */ private function _set_bucket(string $id): string { return substr($id, 0, static::$bucketMagic); } /** * updates attachment count and size * * @param boolean $add * @param integer $size * * @return void */ private function _update_attachment_stats(bool $add, int $size): void { $op = $add ? '+' : '-'; $query = $this->cassandra->prepare('UPDATE attachment_stats SET count = count ' . $op . ' 1 where client_id = ?'); $this->cassandra->execute($query, ['arguments' => ['client_id' => (int) static::$clientId]]); $query = $this->cassandra->prepare( 'UPDATE attachment_stats SET size = size ' . $op . ' ' . $size . ' where client_id = ?' ); $this->cassandra->execute($query, ['arguments' => ['client_id' => static::$clientId]]); } /** * returns full attachment table key for given id * * @param string $id * * @return object|null */ private function _get_attachment_key(string $id): ?object { //echo "GET KEY!\n"; $result = null; $query = $this->cassandra->prepare('SELECT * from attachment_ids where id = ? AND bucket = ? AND client_id = ?'); $arguments = [ 'client_id' => static::$clientId, 'bucket' => $this->_set_bucket($id), 'id' => $id, ]; //echo "args: " . var_dump($arguments, true) . PHP_EOL; $data = $this->cassandra->execute($query, ['arguments' => $arguments]); echo "GK2!!!!"; //var_dump($data); if ($data && $data->valid()) { $result = (object) $data->current(); } //var_dump($result); return $result; } /** * returns single attachment data for provided ID * * @param string $attachmentId * * @return array */ private function _get_attachment_by_id(string $attachmentId): ?object { $attachment = null; try { $properties = '*'; echo "attid: ". $attachmentId . PHP_EOL; $key = $this->_get_attachment_key($attachmentId); if ($key) { if ($this->schema_version() === 1) { $query = $this->cassandra->prepare('SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?'); $arguments = [ 'client_id' => static::$clientId, 'id' => $key->id, 'project_id' => $key->project_id, 'entity_type' => $key->entity_type, ]; } else { $query = $this->cassandra->prepare('SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?'); $arguments = [ 'client_id' => static::$clientId, 'id' => $key->id, 'project_id' => $key->project_id, 'entity_id' => $key->entity_id, 'entity_type' => $key->entity_type, ]; } $attachment = $this->cassandra->execute($query, ['arguments' => $arguments]); } } catch (Cassandra\Exception\InvalidArgumentException $e) { } //echo "ATTA: \n"; //var_dump($attachment); return $attachment != null && $attachment->valid() ? $this->_convert_to_object($attachment->current()) : null; } /** * deletes attachment_ids entry * * @param string $id */ private function _delete_attachment_key(string $id): void { $result = null; $query = $this->cassandra->prepare('DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?'); $arguments = [ 'client_id' => static::$clientId, 'bucket' => $this->_set_bucket($id), 'id' => $id, ]; $this->cassandra->executeAsync($query, ['arguments' => $arguments]); } /** * updates attachment data references * * @param string $data_id * @param bool $add * @param string $attachmentId * * @return void */ private function _update_file_refs(string $data_id, bool $add, string $attachmentId = ''): void { $queryArguments = [ 'client_id' => static::$clientId, 'bucket' => $this->_set_bucket($data_id), 'id' => $data_id, ]; $query = $this->cassandra->prepare('UPDATE attachment_file_refs SET ref_count = ref_count ' . ($add ? '+' : '-') . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?'); $this->cassandra->executeAsync($query, ['arguments' => $queryArguments]); if (!empty($attachmentId)) { $queryArguments['attachment_id'] = $attachmentId; if ($add) { $query = $this->cassandra->prepare('INSERT INTO attachment_file_ids (client_id,bucket,id,attachment_id) VALUES(?,?,?,?)'); } else { $query = $this->cassandra->prepare('DELETE FROM attachment_file_ids WHERE client_id = ? AND bucket = ? AND attachment_id = ? AND id = ?'); } $this->cassandra->executeAsync($query, ['arguments' => $queryArguments]); } } /** * Deletes attachment from Cassandra * * * @param int $clientId * @param string|null $bucketId * @param string $id * * @return bool */ public function deleteAttachment(int $clientId, ?string $bucketId, string $id): bool { static::$clientId = $clientId; $refData = [ 'client_id' => $clientId, 'id' => $id, ]; $fileInfo = $this->get_info($clientId, $bucketId, $id); //var_dump($fileInfo); if ($fileInfo) { $this->_update_attachment_stats(false, $fileInfo->size); } else { return false; } $q = 'DELETE FROM attachment_file_info WHERE id = ? AND client_id = ? '; if ($this->schema_version() === 2) { $q = $q . ' AND bucket = \'' . $bucketId . '\''; } $query = $this->cassandra->prepare($q); $this->cassandra->execute($query, ['arguments' => $refData]); echo "DEL REFS\n"; $refData['bucket'] = $this->_set_bucket($id); $query = $this->cassandra->prepare( 'DELETE FROM attachment_file_refs WHERE bucket = ? AND id = ? AND client_id = ?' ); $result = $this->cassandra->execute($query, ['arguments' => $refData]); // get all attachments with deleted file and remove them $query = $this->cassandra->prepare( 'SELECT attachment_id FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?' ); $result = $this->cassandra->execute($query, ['arguments' => $refData]); echo "DEL ATTACHMENTS\n"; if ($this->schema_version() === 1) { $delQuery = $this->cassandra->prepare( 'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' ); } else { $delQuery = $this->cassandra->prepare( 'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?' ); } while ($result && $result->valid()) { $attachmentId = $result->current()['attachment_id']; $key = $this->_get_attachment_key((string) $attachmentId); if ($key == null) { $result->next(); continue; } if ($this->schema_version() === 1) { $selectQuery = $this->cassandra->prepare( 'SELECT entity_id FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' ); $attachment = $this->cassandra->execute($selectQuery, [ 'arguments' => [ 'client_id' => static::$clientId, 'project_id' => $key->project_id, 'entity_type' => $key->entity_type, 'id' => $attachmentId, ] ]); $entity_id = $attachment->current()['entity_id']; $delArgs = [ 'client_id' => static::$clientId, 'project_id' => $key->project_id, 'entity_type' => $key->entity_type, 'id' => $attachmentId, ]; } else { $entity_id = $key->entity_id; $delArgs = [ 'client_id' => static::$clientId, 'project_id' => $key->project_id, 'entity_type' => $key->entity_type, 'entity_id' => $entity_id, 'id' => $attachmentId, ]; } $this->_delete_attachment_key((string) $attachmentId); $this->cassandra->execute($delQuery, ['arguments' => $delArgs]); $result->next(); } $query = $this->cassandra->prepare( 'DELETE FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?' ); $this->cassandra->execute($query, ['arguments' => $refData]); $result = false; $attachment = $this->_get_attachment_by_id($id); if ($attachment) { if ($this->schema_version() === 1) { $query = $this->cassandra->prepare("DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_type = ? AND client_id = ?"); $arguments = [ 'arguments' => [ 'client_id' => static::$clientId, 'id' => $id, 'project_id' => $attachment->project_id, 'entity_type' => $attachment->entity_type ], ]; } else { $query = $this->cassandra->prepare("DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_id = ? AND entity_type = ? AND client_id = ?"); $arguments = [ 'arguments' => [ 'client_id' => static::$clientId, 'id' => $id, 'project_id' => $attachment->project_id, 'entity_type' => $attachment->entity_type, 'entity_id' => $attachment->entity_id ], ]; } $queryResult = $this->cassandra->execute($query, $arguments) != null; if ($queryResult) { $result = true; $this->_update_file_refs($attachment->data_id, false, $attachment->id); $this->_delete_attachment_key($id); } } return $result; } /** * Deletes attachments provided in file * * * @param string $file * @param string $src ('cassandra'/'file') * * @return void * * example file: * * cassandra,/test1/testx,testx,testx-thumb1,testx-thumb2,4343,20-02-22 13:30,1,a,abc-def * cassandra,/test1/testx1,testx1,testx1-thumb1,testx1-thumb2,4343,20-02-22 13:30,1,1,1abc-def * file,/tmp/testx3f,testx3f,testx3f-thumb1,testx3f-thumb2,4343,20-02-22 13:30,,, * cassandra,/test1/testx3,testx3,testx3-thumb1,testx3-thumb2,4343,20-02-22 13:30,1,3,3abc-def */ public function processAttachmentDeletionCSV(string $file, string $src) : void { echo "Before proceeding with the deletion, make sure you have a backup of your data." . PHP_EOL; echo "You can revert back to the backup in case of accidental data loss." . PHP_EOL; echo "Do you want to delete the physical files listed in the CSV report? (yes/no): "; $confirmation = trim(fgets(STDIN)); if (strtolower($confirmation) !== 'yes') { return; } $file_contents = file_get_contents($file); $file_lines = explode("\n", $file_contents); $logFile = 'deleted_files.log'; $logHandle = fopen($logFile, 'a'); $directory = $this->directory . '/'; foreach ($file_lines as $line) { if ($line === reset($file_lines)) { continue; } $values = explode(",", $line); if ($values[0] === 'File/Attachment') { continue; } if (count($values) >= 7) { $data = (object) array(); $data->source = $values[0]; $data->path = $values[1]; $data->name = $values[2]; $data->thumb1 = $values[3]; $data->thumb2 = $values[4]; $data->size = $values[5]; $data->created = $values[6]; if(count($values) >= 10) { $data->clientId = (int) $values[7]; $data->bucket = $values[8]; $data->id = $values[9]; } if ($data->source === 'Attachment' && $src === 'cassandra') { echo "will delete " . $data->clientId . " : " . $data->bucket . " : " . $data->id . PHP_EOL; fwrite($logHandle, "Deleted attachment: $data->id" . PHP_EOL); } else if ($src === 'File' && $source === 'file'){ $filePath = $directory . $values[1]; $thumb1Path = $directory . $values[1]; $thumb2Path = $directory . $values[1]; if (file_exists($filePath)) { // unlink($filePath); // unlink($thumb1Path); // unlink($thumb2Path); echo "File deleted: $filePath" . PHP_EOL; // Write the deleted file path to the log file fwrite($logHandle, "Deleted file: $filePath" . PHP_EOL); } else { echo "File not found: $filePath" . PHP_EOL; } } } } fclose($logHandle); echo "Deletion completed. The list of attachments is saved in '$logFile'." . PHP_EOL; } } #$options = getopt('', ['directory:']); $checker = new DataConsistencyChecker(); $checker->checkConsistency("attachment_file_info", true); #$checker::$cassandraHost = 'localhost'; #$checker->init($options); #$checker->deleteAttachment("1", "f", "ff29ead0-8696-4ef1-8120-538d6dd7efd1"); #$checker->processAttachmentDeletionCSV("todelete.csv", "file");