directory = $directory; } $this->runFromCommandLine($_SERVER['argv']); } /** * Initializes the Cassandra connection based on the configuration settings. * * @return void */ public function init(): void { $config = parse_ini_file('config.ini', true); $cassandraConfig = $config['CASSANDRA']; $this->_cluster = Cassandra::cluster() ->withContactPoints($cassandraConfig['host']) ->withPort((int)$cassandraConfig['port']) ->withCredentials( $cassandraConfig['user'], $cassandraConfig['password'] ) ->build(); if ($this->_cluster) { try { $this->cassandra = $this->_cluster->connect($cassandraConfig['keyspace']); } catch (Exception $e) { echo "An error occurred: " . $e->getMessage() . "\n"; } } } /** * Runs the script from the command line with the provided arguments. * * @param array $arguments The command line arguments. * @return void */ public function runFromCommandLine(array $arguments): void { $shortOptions = "hd:v:o:r:s:"; $longOptions = ["help", "directory:", "version:", "v", "output:", "o", "remove:", "r", "source:", "s"]; $options = getopt($shortOptions, $longOptions); if (count($options) === 0 || isset($options['h']) || isset($options['help'])) { $this->displayHelpMessage(); exit; } $directory = $options['directory'] ?? $options['d'] ?? null; $schemaVersion = $options['version'] ?? $options['v'] ?? null; $source = $options['source'] ?? $options['s'] ?? null; $remove = $options['remove'] ?? $options['r'] ?? null; $structured_directory = $options['output'] ?? $options['o'] ?? null; if (!file_exists($structured_directory)) { if ($structured_directory === null && $source === null) { echo "Output directory is required. Please specify the --output option.\n"; exit; } if ($structured_directory !== null) { mkdir($structured_directory, 0777, true); } } $this->structured_directory = $structured_directory; if (($directory === null || $schemaVersion === null) && $remove === null && $source === null) { echo "Missing Attachment directory or schema version.\n"; exit; } if ($schemaVersion && !in_array($schemaVersion, [1, 2])) { echo "Invalid schema version. Only versions 1 and 2 are supported.\n"; exit; } if ($structured_directory === null && $source === null) { echo "Output directory is required. Please specify the --output option.\n"; exit; } static::$schemaVersion = (int) $schemaVersion; $this->directory = $directory; $this->retrived_csv = './result_from_physical_files.csv'; if ($remove && $source) { $this->processAttachmentDeletionCSV($remove, $source); } else { $this->checkConsistency('attachment_file_info'); if (is_dir($this->structured_directory)) { $this->removeDirectory($this->structured_directory); } } exit; } /** * Displays the help message with instructions on how to use the script. * * @return void */ private function displayHelpMessage(): void { $helpMessage = <<removeDirectory($path); } else { unlink($path); } } rmdir($directory); } /** * Checks the consistency between database entries and file entries. * * @param string $tableName The name of the table in the database to check consistency for. * @return void */ public function checkConsistency(string $tableName): void { $this->init(); $dbEntries = $this->getDbEntries($tableName); $fileEntries = $this->getFileEntries($this->directory); $this->process_files_in_directory($this->structured_directory); } /** * Retrieves file entries from a directory and organizes them based on dynamic values. * * @param string $directory The directory path to retrieve file entries from. * @return array An array containing the file entries organized by dynamic values. */ private function getFileEntries(string $directory): array { $files = glob($directory . '/*'); $entries = []; foreach ($files as $file) { if (is_file($file)) { $fileName = basename($file); if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) { continue; } $dashParts = explode(self::DELIMITER, $fileName, 2); $dotParts = explode('.', $fileName); if (count($dashParts) === 2) { $clientId = $dashParts[0]; $id = $dashParts[1]; $fileParts = $dashParts; $delimiter = self::DELIMITER; } elseif (count($dotParts) >= 2) { $clientId = $dotParts[0]; $id = implode('.', array_slice($dotParts, 1)); $fileParts = $dotParts; $delimiter = '.'; } else { continue; } $filePath = $file; $size = filesize($filePath); $creationTime = date('Y-m-d H:i:s', filemtime($filePath)); $dynamicValue = substr($id, 0, 2); $creationTime = str_replace('"', '', $creationTime); $entries[$dynamicValue][] = [ 'client_id' => $clientId, 'file_path' => $filePath, 'file_name' => implode($delimiter, $fileParts), 'size' => $size, 'creation_time' => $creationTime, ]; } } // create CSV file for each client ID's physical file entries foreach ($entries as $clientId => $clientEntries) { $this->createPhysicalFileCSV($clientId, $clientEntries); } return $entries; } /** * Creates a CSV file containing the physical file entries for a specific client ID. * * @param string $clientId The client ID. * @param array $entries An array containing the physical file entries for the client. * @return void */ private function createPhysicalFileCSV(string $clientId, array $entries): void { $fileName = $this->structured_directory . 'physical_' . $clientId . self::CSV_EXTENSION; $csvFile = fopen($fileName, 'w'); fputcsv($csvFile, ['id', 'size', 'creation_time', 'filename']); foreach ($entries as $entry) { $idx = strpos($entry['file_name'],'.'); $id = $idx === false ? $entry['file_name'] : substr($entry['file_name'],0,$idx); fputcsv($csvFile, [ $id, $entry['size'], $entry['creation_time'], $entry['file_name'] ]); } fclose($csvFile); } /** * Creates a CSV file containing the entries for a specific client ID. * * @param string $clientId The client ID. * @param array $entries An array containing the entries for the client. * @return void */ private function createDBFileCSV(string $clientId, array $entries): void { $fileName = $this->structured_directory . 'cassandra_' . (string) $clientId . self::CSV_EXTENSION; $csvFile = fopen($fileName, 'w'); $headers = ['id', 'size', 'creation_time', 'filename', 'bucket', 'client_id', 'attachment_id']; fputcsv($csvFile, $headers); foreach ($entries as $key => $entry) { $rowData = [ $entry['id'], $entry['size'], $entry['creation_time'], $entry['filename'], $entry['bucket'], $entry['client_id'], $entry['attachment_id'], ]; fputcsv($csvFile, $rowData); } fclose($csvFile); } /** * Retrieves entries from a database table. * * @param string $tableName The name of the database table. * @return array An array containing the retrieved entries. */ private function getDbEntries(string $tableName): array { $query = sprintf( 'SELECT client_id, id, size, filename, created_on%s FROM %s', $this->schema_version() === 1 ? '' : ', bucket', $tableName ); $arguments = []; $result = $this->cassandra->execute( $query, [ 'arguments' => $arguments, 'page_size' => static::DEFAULT_PAGE_SIZE ] ); $entries = []; while ($result) { foreach ($result as $row) { if ($row['bucket'] === 'x') { continue; // Skip the current iteration if bucket is 'x' } if (preg_match('/^[0-9]+$/', $row['id'])) { $dotParts = explode(".", $row['filename'], 2); $dynamicValue = substr($dotParts[1], 0, 2); $timestamp = (int) $row['created_on']; $date = date('Y-m-d H:i:s', $timestamp); $creationTime = str_replace('"', '', $date); $entry = [ "id" => $row['id'], "size" => (string) $row['size'], "creation_time" => $creationTime, 'filename' => $row['filename'], 'bucket' => $row['bucket'] ? $row['bucket'] : '', 'client_id' => (string) $row['client_id'], 'attachment_id' => (string) $row['id'], ]; if ($row['bucket'] !== 'x') { $entries[$dynamicValue][$row['filename']] = $entry; } } else { $clientId = substr($row['id'], 0, 2); $file = $row['client_id'] . self::DELIMITER . $row['id']; $timestamp = (int) $row['created_on']; $date = date('Y-m-d H:i:s', $timestamp); $creationTime = str_replace('"', '', $date); $entry = [ "id" => $file, "size" => (string) $row['size'], "creation_time" => $creationTime, 'filename' => $row['filename'], 'bucket' => $row['bucket'] ? $row['bucket'] : '', 'client_id' => (string) $row['client_id'], 'attachment_id' => (string) $row['id'], ]; if (!isset($entries[$clientId])) { $entries[$clientId] = []; } if ($row['bucket'] !== 'x') { $entries[$clientId][$file] = $entry; } } } $result = $result->nextPage(); } // Merge all entries into a single array $allEntries = []; foreach ($entries as $clientEntries) { $allEntries = array_merge($allEntries, $clientEntries); } // Create CSV file for each client ID's physical file entries foreach ($entries as $clientId => $clientEntries) { $this->createDBFileCSV($clientId, $clientEntries); } return $entries; } /** * Compares a file entry with its corresponding entry in the Cassandra file association. * * @param string $id The ID of the file entry. * @param array $data An array containing data of the file entry. * @param array $cassandra_file_assoc The Cassandra file association. * @return array|null An array containing mismatched entries, or null if the entries match. */ private function compareFileEntries(string $id, array $data, array $cassandra_file_assoc) { if (!isset($cassandra_file_assoc[$id])) { return [ 'id' => $id, 'file1' => [$id, trim($data[0], '"'), trim($data[1], '"')], ]; } else { $physical_value1 = trim($data[0], '"'); $physical_value2 = trim($data[1], '"'); $cassandra_value1 = trim($cassandra_file_assoc[$id][0], '"'); $cassandra_value2 = trim($cassandra_file_assoc[$id][1], '"'); if ($physical_value1 !== $cassandra_value1 || $physical_value2 !== $cassandra_value2) { return [ 'id' => $id, 'file1' => [$id, $physical_value1, $physical_value2], 'file2' => [$id, $cassandra_value1, $cassandra_value2], ]; } } return null; } /** * Retrieves the lines of a file and returns them as an array. * * @param string $file The path to the file. * @return array An array containing the lines of the file. */ private function getFileLines(string $file): array { $file_contents = file_get_contents($file); return explode("\n", $file_contents); } /** * Filters out null entries and maps the remaining entries to their first element. * * @param array $entries An array containing entries to be filtered and mapped. * @return array An array of filtered and mapped entries. */ private function filterAndMapEntries(array $entries): array { $filtered_entries = array_filter($entries); $mapped_entries = array_map(function ($entry) { return $entry[0]; }, $filtered_entries); return array_values($mapped_entries); } /** * Processes files in a directory, performs comparisons, and generates CSV and HTML reports. * * @param string $dir The directory path containing the files to be processed. * @return void */ private function process_files_in_directory(string $dir): void { $files = glob($dir . '/*.csv'); $physical_files = array(); $cassandra_files = array(); foreach ($files as $file) { $filename = basename($file, static::CSV_EXTENSION); $csv_type = substr( $filename, 0, strpos($filename, '_') ); $file_num = substr( $filename, strpos($filename, '_') + 1 ); if ($csv_type === 'physical') { $physical_files[$file_num] = $file; } elseif ($csv_type === 'cassandra') { $cassandra_files[$file_num] = $file; } } ksort($physical_files); ksort($cassandra_files); $missing_physical_files = array(); $missing_cassandra_entries = array(); $cassandra_entries = []; $physical_entries = []; foreach ($physical_files as $file_num => $physical_file) { $cassandra_file = $dir . '/cassandra_' . $file_num . self::CSV_EXTENSION; if (file_exists($physical_file) && file_exists($cassandra_file)) { $compared_physical[] = $this->compare_csv_files($physical_file, $cassandra_file); $compared_cassandra[] = $this->compare_csv_files($cassandra_file, $physical_file); $physical_entries = $this->filterAndMapEntries($compared_cassandra); $cassandra_entries = $this->filterAndMapEntries($compared_physical); } else { if (!file_exists($physical_file)) { $missing_physical_files[] = $physical_file; } if (!file_exists($cassandra_file)) { $missing_cassandra_files[] = $cassandra_file; } } if (!file_exists($cassandra_file)) { $physical_file_lines = $this->getFileLines($physical_file); $file_assoc = array(); foreach ($physical_file_lines as $line) { if ($line === reset($physical_file_lines)) { continue; } $values = explode(',', $line); if (count($values) === self::NUM_FIELDS) { $values[2] = str_replace('"', '', $values[2]); $file_assoc[$values[0]] = [$values[1], $values[2]]; } } foreach ($file_assoc as $id => $data) { if (!isset($cassandra_files_assoc[$id])) { $missing_cassandra_entries[] = [ 'id' => $id, 'file1' => [$id, $data[0], $data[1]], ]; } } } else { } } foreach ($cassandra_files as $file_num => $cassandra_file) { $physical_file = $dir . '/physical_' . $file_num . self::CSV_EXTENSION; if (!file_exists($physical_file)) { $cassandra_file_lines = $this->getFileLines($cassandra_file); $file_assoc = $this->buildFileAssociation($cassandra_file_lines); foreach ($file_assoc as $id => $data) { if (!isset($physical_files_assoc[$id])) { $missing_physical_files[] = [ 'id' => $id, 'file1' => [$id, $data[0], $data[1], $data[2], $data[3], $data[4], $data[5]], ]; } } } } $result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR); $result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR); $this->generateCsvReportForDbEntries($result_from_cassandra_entries, self::CASSANDRA_RESULT_CSV); $this->generateHtmlReport($result_from_cassandra_entries, self::CASSANDRA_HTML_REPORT); $this->generateCsvReportForPhysicalFiles($result_from_physical_files, self::PHYSICAL_RESULT_CSV); $this->generateHtmlReport($result_from_physical_files, self::PHYSICAL_HTML_REPORT); } /** * Builds an associative array from file lines. * * @param array $file_lines An array containing lines of a file. * @return array An associative array representing the file association. */ private function buildFileAssociation(array $file_lines): array { $file_assoc = []; foreach ($file_lines as $line) { if ($line === reset($file_lines)) { continue; } $values = explode(',', $line); if (count($values) === self::NUM_FIELDS) { $file_assoc[$values[0]] = [$values[1], $values[2]]; } if (count($values) > 3) { $file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[6]]; } } return $file_assoc; } /** * Compares two CSV files and returns missing entries or entries with mismatched data. * * @param string $file1_path The file path of the first CSV file. * @param string $file2_path The file path of the second CSV file. * @return array An array containing missing entries or entries with mismatched data. */ private function compare_csv_files(string $file1_path, string $file2_path): array { $file1_data = array_map('str_getcsv', file($file1_path)); $file2_data = array_map('str_getcsv', file($file2_path)); $file1_headers = array_shift($file1_data); $file2_headers = array_shift($file2_data); // find indexes of columns in each file $id_index_1 = array_search('id', $file1_headers); $id_index_2 = array_search('id', $file2_headers); $size_index_1 = array_search('size', $file1_headers); $size_index_2 = array_search('size', $file2_headers); $time_index_1 = array_search('creation_time', $file1_headers); $time_index_2 = array_search('creation_time', $file2_headers); $filename_index_1 = array_search('filename', $file1_headers); $filename_index_2 = array_search('filename', $file2_headers); $clientid_index_1 = array_search('client_id', $file1_headers); $bucket_index_1 = array_search('bucket', $file1_headers); $attachment_id_index_1 = array_search('attachment_id', $file1_headers); $file1_assoc = array_reduce( $file1_data, function ($result, $row) use ( $id_index_1, $size_index_1, $time_index_1, $filename_index_1, $clientid_index_1, $bucket_index_1, $attachment_id_index_1 ) { $result[$row[$id_index_1]] = [ 'id' => $row[$id_index_1], 'file1' => [ $row[$id_index_1], $row[$size_index_1], $row[$time_index_1], isset($row[$filename_index_1]) ? $row[$filename_index_1] : null, isset($row[$clientid_index_1]) ? $row[$clientid_index_1] : null, isset($row[$bucket_index_1]) ? $row[$bucket_index_1] : null, isset($row[$attachment_id_index_1]) ? $row[$attachment_id_index_1] : null ], ]; return $result; }, []); $file2_assoc = array_reduce( $file2_data, function ($result, $row) use ( $id_index_2, $size_index_2, $time_index_2, $filename_index_2 ) { $result[$row[$id_index_2]] = [ 'id' => $row[$id_index_2], 'file2' => [ $row[$id_index_2], $row[$size_index_2], $row[$time_index_2], isset($row[$filename_index_2]) ? $row[$filename_index_2] : null ], ]; return $result; }, []); $missing_entries = []; foreach ($file1_assoc as $id => $data) { if (!isset($file2_assoc[$id])) { $missing_entries[] = [ 'id' => $id, 'file1' => $data['file1'], ]; } else { $file2_data = $file2_assoc[$id]['file2']; if ($data['file1'][0] !== $file2_data[0] || $data['file1'][1] !== $file2_data[1]) { $missing_entries[] = [ 'id' => $id, 'file1' => $data['file1'], 'file2' => $file2_data, ]; } } } return $missing_entries; } /** * Generates a CSV report of inconsistent files. * * @param array $inconsistentFiles An array of inconsistent files. * @param string $filename The filename to use for the report. */ private function generateCsvReportForPhysicalFiles(array $inconsistentFiles, string $name): void { $fp = fopen($name, 'w'); fputcsv($fp, [ self::CSV_COLUMN_FILE_ATTACHMENT, self::CSV_COLUMN_FILE_PATH, self::CSV_COLUMN_FILE_NAME, self::CSV_COLUMN_THUMB1, self::CSV_COLUMN_THUMB2, self::CSV_COLUMN_SIZE, self::CSV_COLUMN_CREATION_TIME, self::CSV_COLUMN_CLIENT_ID, self::CSV_COLUMN_BUCKET, self::CSV_COLUMN_ID ]); foreach ($inconsistentFiles as $row) { $check_value = $row['id']; $filePath = $check_value ? $check_value : $row['file2'][0]; if (is_numeric($row['id'])) { $filePath = $row['file1'][3]; $check_value = $row['file1'][3]; } $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; $bucket = isset($row['file1'][4]) ? $row['file1'][4] : ''; $clientId = isset($row['file1'][5]) ? $row['file1'][5] : ''; $attachmentId = isset($row['file1'][6]) ? $row['file1'][6] : ''; fputcsv($fp, [ 'Attachment', $filePath, $check_value, $thumb1, $thumb2, $size, $creationTime, $clientId, $bucket, $attachmentId, ]); } fclose($fp); chmod($name, 0666); } /** * Generates a CSV report for inconsistent database entries. * * @param array $inconsistentFiles An array containing inconsistent file data. * @param string $name The name of the CSV report file to be generated. * @return void */ private function generateCsvReportForDbEntries(array $inconsistentFiles, string $name): void { $fp = fopen($name, 'w'); fputcsv($fp, [ self::CSV_COLUMN_FILE_ATTACHMENT, 'Entry Path', 'Entry Name', self::CSV_COLUMN_THUMB1, self::CSV_COLUMN_THUMB2, self::CSV_COLUMN_SIZE, self::CSV_COLUMN_CREATION_TIME ]); foreach ($inconsistentFiles as $row) { $filePath = $this->directory . DIRECTORY_SEPARATOR . $row['file1'][3] ? $this->directory . DIRECTORY_SEPARATOR . $row['file1'][3] : $this->directory . DIRECTORY_SEPARATOR . $row['file2'][3]; $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = $row['file1'][3] . '-thumb1'; $thumb2 = $row['file1'][3] . '-thumb2'; if (is_string($row['id']) && strpos($row['id'], '.') !== false) { $old_attachment = explode('.', $row['id'])[0]; $thumb1 = ''; $thumb2 = ''; } fputcsv($fp, [ 'File', $filePath, $row['file1'][0], $thumb1, $thumb2, $size, $creationTime ]); } fclose($fp); chmod($name, 0666); } /** * Generates an HTML report of inconsistent files. * * @param array $inconsistentFiles An array of inconsistent files. * @param string $filename The filename to use for the report. */ private function generateHtmlReport(array $inconsistentFiles, string $name): void { $templateFile = 'report_template.html'; $templateContent = file_get_contents($templateFile); if ($templateContent === false) { throw new Exception('Failed to read the HTML template file.'); } $tableRows = ''; foreach ($inconsistentFiles as $row) { $filename = $row['file1'][0]; $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename; $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? str_replace('"', '', $row['file1'][2]) : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = $row['file1'][0] . '-thumb1'; $thumb2 = $row['file1'][0] . '-thumb2'; if (isset($row['file1'][3]) && preg_match('/^[0-9]+\./', $row['file1'][3])) { $filename = $row['file1'][3]; $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename; $thumb1 = ''; $thumb2 = ''; } if (is_string($row['id']) && strpos($row['id'], '.') !== false) { $filename = $row['id']; $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename; $thumb1 = ''; $thumb2 = ''; } $tableRows .= 'Attachment' . htmlspecialchars($filePath) . '' . htmlspecialchars($filename) . '' . htmlspecialchars($thumb1) . '' . htmlspecialchars($thumb2) . '' . htmlspecialchars($size) . '' . htmlspecialchars($creationTime) . ''; } $html = str_replace('{{table_rows}}', $tableRows, $templateContent); $file = fopen($name, 'w'); if (!$file) { throw new Exception('Failed to open the file for writing.'); } fwrite($file, $html); fclose($file); } /** * Returns cassandra schema version * * * @return int */ private function schema_version(): int { return static::$schemaVersion; } /** * Returns info(id,size) for attachment file * * @param int $clientId * @param string|null $bucketId * @param string $id * @return object|null */ private function get_info(int $clientId, ?string $bucketId, string $id): ?object { $attachment = null; $args = [ 'client_id' => $clientId, 'id' => $id ]; if ($this->schema_version() == 1) { $query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ?'); } else { $q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\''; $query = $this->cassandra->prepare($q); } $res = $this->cassandra->execute( $query, [ 'arguments' => $args ] ); if ($res && $res->valid()) { $tmp = $res->current(); $attachment = (object) $tmp; $attachment->id = (string) $tmp['id']; if (array_key_exists('size', $tmp)) { $attachment->size = (int) $tmp['size']; } } return $attachment; } /** * prepares 'bucket' field for partitioning * * @param string $id * @return string */ private function _set_bucket(string $id): string { return substr($id, 0, static::$bucketMagic); } /** * updates attachment count and size * * @param boolean $add * @param integer $size * * @return void */ private function _update_attachment_stats(bool $add, int $size): void { $op = $add ? '+' : self::DELIMITER; $query = $this->cassandra->prepare('UPDATE attachment_stats SET count = count ' . $op . ' 1 where client_id = ?'); $this->cassandra->execute($query, ['arguments' => ['client_id' => (int) static::$clientId]]); $query = $this->cassandra->prepare( 'UPDATE attachment_stats SET size = size ' . $op . ' ' . $size . ' where client_id = ?' ); $this->cassandra->execute($query, ['arguments' => ['client_id' => static::$clientId]]); } /** * returns full attachment table key for given id * * @param string $id * * @return object|null */ private function _get_attachment_key(string $id): ?object { $result = null; $query = $this->cassandra->prepare('SELECT * from attachment_ids where id = ? AND bucket = ? AND client_id = ?'); $arguments = [ 'client_id' => static::$clientId, 'bucket' => $this->_set_bucket($id), 'id' => $id, ]; $data = $this->cassandra->execute($query, ['arguments' => $arguments]); if ($data && $data->valid()) { $result = (object) $data->current(); } return $result; } /** * returns single attachment data for provided ID * * @param string $attachmentId * * @return object|null */ private function _get_attachment_by_id(string $attachmentId): ?object { $attachment = null; try { $properties = '*'; $key = $this->_get_attachment_key($attachmentId); if ($key) { if ($this->schema_version() === 1) { $query = $this->cassandra->prepare('SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?'); $arguments = [ 'client_id' => static::$clientId, 'id' => $key->id, 'project_id' => $key->project_id, 'entity_type' => $key->entity_type, ]; } else { $query = $this->cassandra->prepare('SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?'); $arguments = [ 'client_id' => static::$clientId, 'id' => $key->id, 'project_id' => $key->project_id, 'entity_id' => $key->entity_id, 'entity_type' => $key->entity_type, ]; } $attachment = $this->cassandra->execute($query, ['arguments' => $arguments]); } } catch (Cassandra\Exception\InvalidArgumentException $e) { } return $attachment != null && $attachment->valid() ? $this->_convert_to_object($attachment->current()) : null; } /** * deletes attachment_ids entry * * @param string $id */ private function _delete_attachment_key(string $id): void { $this->cassandra->executeAsync( $this->cassandra->prepare('DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?'), [ 'arguments' => [ 'client_id' => static::$clientId, 'bucket' => $this->_set_bucket($id), 'id' => $id ] ] ); } /** * updates attachment data references * * @param string $data_id * @param bool $add * @param string $attachmentId * * @return void */ private function _update_file_refs(string $data_id, bool $add, string $attachmentId = ''): void { $queryArguments = [ 'client_id' => static::$clientId, 'bucket' => $this->_set_bucket($data_id), 'id' => $data_id, ]; $query = $this->cassandra->prepare('UPDATE attachment_file_refs SET ref_count = ref_count ' . ($add ? '+' : self::DELIMITER) . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?'); $this->cassandra->executeAsync($query, ['arguments' => $queryArguments]); if (!empty($attachmentId)) { $queryArguments['attachment_id'] = $attachmentId; if ($add) { $query = $this->cassandra->prepare('INSERT INTO attachment_file_ids (client_id,bucket,id,attachment_id) VALUES(?,?,?,?)'); } else { $query = $this->cassandra->prepare('DELETE FROM attachment_file_ids WHERE client_id = ? AND bucket = ? AND attachment_id = ? AND id = ?'); } $this->cassandra->executeAsync($query, ['arguments' => $queryArguments]); } } /** * Deletes attachment from Cassandra * * * @param int $clientId * @param string|null $bucketId * @param string $id * * @return bool */ public function deleteAttachment(int $clientId, ?string $bucketId, string $id): bool { static::$clientId = $clientId; $refData = [ 'client_id' => $clientId, 'id' => $id, ]; $fileInfo = $this->get_info($clientId, $bucketId, $id); if ($fileInfo) { $this->_update_attachment_stats(false, $fileInfo->size); } else { return false; } $q = 'DELETE FROM attachment_file_info WHERE id = ? AND client_id = ? '; if ($this->schema_version() === 2) { $q = $q . ' AND bucket = \'' . $bucketId . '\''; } $query = $this->cassandra->prepare($q); $this->cassandra->execute($query, ['arguments' => $refData]); $refData['bucket'] = $this->_set_bucket($id); $query = $this->cassandra->prepare( 'DELETE FROM attachment_file_refs WHERE bucket = ? AND id = ? AND client_id = ?' ); $result = $this->cassandra->execute($query, ['arguments' => $refData]); // get all attachments with deleted file and remove them $query = $this->cassandra->prepare( 'SELECT attachment_id FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?' ); $result = $this->cassandra->execute($query, ['arguments' => $refData]); if ($this->schema_version() === 1) { $delQuery = $this->cassandra->prepare( 'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' ); } else { $delQuery = $this->cassandra->prepare( 'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?' ); } while ($result && $result->valid()) { $attachmentId = $result->current()['attachment_id']; $key = $this->_get_attachment_key((string) $attachmentId); if ($key == null) { $result->next(); continue; } if ($this->schema_version() === 1) { $selectQuery = $this->cassandra->prepare( 'SELECT entity_id FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' ); $attachment = $this->cassandra->execute($selectQuery, [ 'arguments' => [ 'client_id' => static::$clientId, 'project_id' => $key->project_id, 'entity_type' => $key->entity_type, 'id' => $attachmentId, ] ]); $entity_id = $attachment->current()['entity_id']; $delArgs = [ 'client_id' => static::$clientId, 'project_id' => $key->project_id, 'entity_type' => $key->entity_type, 'id' => $attachmentId, ]; } else { $entity_id = $key->entity_id; $delArgs = [ 'client_id' => static::$clientId, 'project_id' => $key->project_id, 'entity_type' => $key->entity_type, 'entity_id' => $entity_id, 'id' => $attachmentId, ]; } $this->_delete_attachment_key((string) $attachmentId); $this->cassandra->execute($delQuery, ['arguments' => $delArgs]); $result->next(); } $query = $this->cassandra->prepare( 'DELETE FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?' ); $this->cassandra->execute($query, ['arguments' => $refData]); $result = false; $attachment = $this->_get_attachment_by_id($id); if ($attachment) { if ($this->schema_version() === 1) { $query = $this->cassandra->prepare("DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_type = ? AND client_id = ?"); $arguments = [ 'arguments' => [ 'client_id' => static::$clientId, 'id' => $id, 'project_id' => $attachment->project_id, 'entity_type' => $attachment->entity_type ], ]; } else { $query = $this->cassandra->prepare("DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_id = ? AND entity_type = ? AND client_id = ?"); $arguments = [ 'arguments' => [ 'client_id' => static::$clientId, 'id' => $id, 'project_id' => $attachment->project_id, 'entity_type' => $attachment->entity_type, 'entity_id' => $attachment->entity_id ], ]; } $queryResult = $this->cassandra->execute($query, $arguments) != null; if ($queryResult) { $result = true; $this->_update_file_refs($attachment->data_id, false, $attachment->id); $this->_delete_attachment_key($id); } } return $result; } /** * Deletes attachments provided in file * * * @param string $file * @param string $src ('cassandra'/'file') * * @return void * * example file: * * cassandra,/test1/testx,testx,testx-thumb1,testx-thumb2,4343,20-02-22 13:30,1,a,abc-def * cassandra,/test1/testx1,testx1,testx1-thumb1,testx1-thumb2,4343,20-02-22 13:30,1,1,1abc-def * file,/tmp/testx3f,testx3f,testx3f-thumb1,testx3f-thumb2,4343,20-02-22 13:30,,, * cassandra,/test1/testx3,testx3,testx3-thumb1,testx3-thumb2,4343,20-02-22 13:30,1,3,3abc-def */ public function processAttachmentDeletionCSV(string $file, string $src): void { echo "Before proceeding with the deletion, make sure you have a backup of your data." . PHP_EOL; echo "You can revert back to the backup in case of accidental data loss." . PHP_EOL; echo "Do you want to delete the physical files listed in the CSV report? (yes/no): "; $confirmation = trim(fgets(STDIN)); if (strtolower($confirmation) !== 'yes') { return; } $this->init(); $file_contents = file_get_contents($file); $file_lines = explode("\n", $file_contents); $logFile = 'deleted_files.log'; $logHandle = fopen($logFile, 'a'); foreach ($file_lines as $line) { if ($line === reset($file_lines)) { continue; } $values = explode(',', $line); if ($values[0] === 'File/Attachment') { continue; } if (count($values) >= 7) { $data = (object) array(); $data->source = $values[0]; $data->path = $values[1]; $data->name = $values[2]; $data->thumb1 = $values[3]; $data->thumb2 = $values[4]; $data->size = $values[5]; $data->created = $values[6]; $path = dirname($data->path); if (count($values) >= 10) { $data->clientId = (int) $values[7]; $data->bucket = $values[8]; $data->id = $values[9]; } if ($data->source === 'Attachment' && $src === 'cassandra') { echo "will delete " . $data->clientId . " : " . $data->bucket . " : " . $data->id . PHP_EOL; $this->deleteAttachment($data->clientId, $data->bucket, $data->id); fwrite($logHandle, "Deleted attachment: $data->id" . PHP_EOL); } else if ($data->source === 'File' && $src === 'file') { $filePath = $values[1]; $thumb1Path = $path . DIRECTORY_SEPARATOR . $values[3]; $thumb2Path = $path . DIRECTORY_SEPARATOR . $values[4]; $filePath = str_replace('"', '', $filePath); $thumb1Path = str_replace('"', '', $thumb1Path); $thumb2Path = str_replace('"', '', $thumb2Path); if (file_exists($thumb1Path) && file_exists($thumb2Path)) { unlink($thumb1Path); unlink($thumb2Path); fwrite($logHandle, "Deleted thumbnail: " . $thumb1Path . PHP_EOL); fwrite($logHandle, "Deleted thumbnail: " . $thumb2Path . PHP_EOL); } if (file_exists($filePath)) { unlink($filePath); unlink($thumb1Path); unlink($thumb2Path); echo "File deleted: $filePath" . PHP_EOL; // Write the deleted file path to the log file fwrite($logHandle, "Deleted file: $filePath" . PHP_EOL); } else { echo "File not found: $filePath" . PHP_EOL; } } } } fclose($logHandle); echo "Deletion completed. The list of attachments is saved in '$logFile'." . PHP_EOL; } } $checker = new DataConsistencyChecker(); $checker->checkConsistency("attachment_file_info", true);