2023-05-19 11:58:21 +02:00
< ? php
class DataConsistencyChecker
{
2023-05-19 19:01:57 +02:00
const DEFAULT_PAGE_SIZE = 30 ;
2023-05-19 11:58:21 +02:00
private $_cluster ;
private $session ;
private $cassandra ;
private $directory ;
2023-05-19 19:01:57 +02:00
private $structured_directory ;
private $retrived_csv ;
2023-05-19 11:58:21 +02:00
static $clientId ;
private static $schemaVersion = 1 ;
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
static $bucketMagic = 4 ;
static $cassandraHost = 'cassandra' ;
public function __construct ( $directory = null )
{
if ( $directory !== null ) {
$this -> directory = $directory ;
}
$this -> runFromCommandLine ( $_SERVER [ 'argv' ]);
}
2023-05-19 19:01:57 +02:00
/**
* Initializes the Cassandra connection based on the configuration settings .
*
* @ return void
*/
2023-05-19 11:58:21 +02:00
public function init () : void
{
2023-05-19 19:01:57 +02:00
$config = parse_ini_file ( " config.ini " , true );
2023-05-19 11:58:21 +02:00
$this -> _cluster = Cassandra :: cluster ()
2023-05-19 19:01:57 +02:00
-> withContactPoints ( $config [ 'CASSANDRA' ][ 'host' ])
2023-05-19 11:58:21 +02:00
-> withPort ( 9042 )
2023-05-19 19:01:57 +02:00
-> withCredentials (
$config [ 'CASSANDRA' ][ 'user' ],
$config [ 'CASSANDRA' ][ 'password' ]
)
2023-05-19 11:58:21 +02:00
-> build ();
if ( $this -> _cluster ) {
try {
2023-05-19 19:01:57 +02:00
$this -> cassandra = $this -> _cluster -> connect ( $config [ 'CASSANDRA' ][ 'keyspace' ]);
2023-05-19 11:58:21 +02:00
} catch ( Exception $e ) {
echo " err \n " ;
}
}
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
}
2023-05-19 19:01:57 +02:00
/**
* Runs the script from the command line with the provided arguments .
*
* @ param array $arguments The command line arguments .
* @ return void
*/
2023-05-19 11:58:21 +02:00
public function runFromCommandLine ( $arguments )
{
2023-05-19 19:01:57 +02:00
$shortOptions = " hd:v:o:r:s: " ;
$longOptions = [ " help " , " directory: " , " version: " , " v " , " output: " , " o " , " remove: " , " r " , " source: " , " s " ];
2023-05-19 11:58:21 +02:00
$options = getopt ( $shortOptions , $longOptions );
if ( count ( $options ) == 0 || isset ( $options [ 'h' ]) || isset ( $options [ 'help' ])) {
$this -> displayHelpMessage ();
exit ;
}
$directory = isset ( $options [ 'directory' ]) ? $options [ 'directory' ] : ( isset ( $options [ 'd' ]) ? $options [ 'd' ] : null );
$schemaVersion = isset ( $options [ 'version' ]) ? $options [ 'version' ] : ( isset ( $options [ 'v' ]) ? $options [ 'v' ] : null );
2023-05-19 19:01:57 +02:00
$source = isset ( $options [ 'source' ]) ? $options [ 'source' ] : ( isset ( $options [ 's' ]) ? $options [ 's' ] : null );
$remove = isset ( $options [ 'remove' ]) ? $options [ 'remove' ] : ( isset ( $options [ 'r' ]) ? $options [ 'r' ] : null );
$structured_directory = isset ( $options [ 'output' ]) ? $options [ 'output' ] : ( isset ( $options [ 'o' ]) ? $options [ 'o' ] : null );
if ( ! file_exists ( $structured_directory )) {
mkdir ( $structured_directory , 0777 , true );
}
$this -> structured_directory = $structured_directory ;
if (( $directory === null || $schemaVersion === null ) && $remove === null && $source === null ) {
2023-05-19 11:58:21 +02:00
echo " Missing Attachment directory or schema version. \n " ;
exit ;
}
2023-05-19 19:01:57 +02:00
if ( $schemaVersion && ! in_array ( $schemaVersion , [ 1 , 2 ])) {
2023-05-19 11:58:21 +02:00
echo " Invalid schema version. Only versions 1 and 2 are supported. \n " ;
exit ;
}
2023-05-19 19:01:57 +02:00
if ( $this -> structured_directory == null ) {
$this -> structured_directory = './' ;
}
2023-05-19 11:58:21 +02:00
2023-05-19 19:01:57 +02:00
static :: $schemaVersion = ( int ) $schemaVersion ;
$this -> directory = $directory ;
$this -> retrived_csv = './result_from_physical_files.csv' ;
if ( $remove && $source ) {
$this -> processAttachmentDeletionCSV ( $remove , $source );
} else {
$this -> checkConsistency ( 'attachment_file_info' );
if ( is_dir ( $this -> structured_directory )) {
$this -> removeDirectory ( $this -> structured_directory );
}
}
2023-05-19 11:58:21 +02:00
exit ;
2023-05-19 19:01:57 +02:00
}
2023-05-19 11:58:21 +02:00
2023-05-19 19:01:57 +02:00
/**
* Displays the help message with instructions on how to use the script .
*
* @ return void
*/
private function displayHelpMessage () : void
2023-05-19 11:58:21 +02:00
{
$helpMessage = <<< EOT
Usage :
php script . php [ options ]
Options :
- h , -- help Display this help screen .
-- version Set the schema version .
-- directory Set the directory path for attachments .
2023-05-19 19:01:57 +02:00
-- output Set the folder for temp files
-- remove We need to choose between file and cassandra what we want to remove
2023-05-19 11:58:21 +02:00
Example :
2023-05-19 19:01:57 +02:00
php script_name -- version schema_version -- directory =/ path / to / directory -- output ./ out /
For Delete :
php script_name -- remove result_from_cassandra_entries . csv -- source file - to remove missing physical files
php script_name -- remove result_from_physical_files . csv -- source cassandra - to remove missing cassandra entries
2023-05-19 11:58:21 +02:00
EOT ;
echo $helpMessage ;
}
2023-05-19 19:01:57 +02:00
/**
* Recursively removes a directory and its contents .
*
* @ param string $directory The directory path to be removed .
* @ return void
*/
private function removeDirectory ( $directory ) : void
2023-05-19 11:58:21 +02:00
{
2023-05-19 19:01:57 +02:00
if ( ! is_dir ( $directory )) {
return ;
}
$files = array_diff ( scandir ( $directory ), [ '.' , '..' ]);
foreach ( $files as $file ) {
$path = $directory . '/' . $file ;
if ( is_dir ( $path )) {
$this -> removeDirectory ( $path );
} else {
unlink ( $path );
}
}
2023-05-19 11:58:21 +02:00
2023-05-19 19:01:57 +02:00
rmdir ( $directory );
}
/**
* Checks the consistency between database entries and file entries .
*
* @ param string $tableName The name of the table in the database to check consistency for .
* @ return void
*/
public function checkConsistency ( $tableName )
{
2023-05-19 11:58:21 +02:00
$this -> init ();
$dbEntries = $this -> getDbEntries ( $tableName );
$fileEntries = $this -> getFileEntries ( $this -> directory );
$this -> process_files_in_directory ( $this -> structured_directory );
}
2023-05-19 19:01:57 +02:00
/**
* Retrieves file entries from a directory and organizes them based on dynamic values .
*
* @ param string $directory The directory path to retrieve file entries from .
* @ return array An array containing the file entries organized by dynamic values .
*/
private function getFileEntries ( $directory ) : array
2023-05-19 11:58:21 +02:00
{
$files = glob ( $directory . '/*' );
$entries = [];
foreach ( $files as $file ) {
if ( is_file ( $file )) {
$fileName = basename ( $file );
if ( strpos ( $fileName , '-thumb1' ) !== false || strpos ( $fileName , '-thumb2' ) !== false ) {
continue ;
}
$dashParts = explode ( " - " , $fileName , 2 );
$dotParts = explode ( " . " , $fileName );
if ( count ( $dashParts ) === 2 ) {
$clientId = $dashParts [ 0 ];
$id = $dashParts [ 1 ];
$fileParts = $dashParts ;
$delimiter = '-' ;
} elseif ( count ( $dotParts ) >= 2 ) {
$clientId = $dotParts [ 0 ];
$id = implode ( " . " , array_slice ( $dotParts , 1 ));
$fileParts = $dotParts ;
$delimiter = '.' ;
} else {
continue ;
}
$filePath = $file ;
$size = filesize ( $filePath );
$creationTime = date ( 'Y-m-d H:i:s' , filemtime ( $filePath ));
$dynamicValue = substr ( $id , 0 , 2 );
$creationTime = str_replace ( '"' , '' , $creationTime );
$entries [ $dynamicValue ][] = [
'client_id' => $clientId ,
'file_path' => $filePath ,
'file_name' => implode ( $delimiter , $fileParts ),
'size' => $size ,
'creation_time' => $creationTime ,
];
}
}
// create CSV file for each client ID's physical file entries
foreach ( $entries as $clientId => $clientEntries ) {
$this -> createPhysicalFileCSV ( $clientId , $clientEntries );
}
return $entries ;
}
2023-05-19 19:01:57 +02:00
/**
* Creates a CSV file containing the physical file entries for a specific client ID .
*
* @ param string $clientId The client ID .
* @ param array $entries An array containing the physical file entries for the client .
* @ return void
*/
2023-05-19 11:58:21 +02:00
private function createPhysicalFileCSV ( $clientId , $entries )
{
2023-05-19 19:01:57 +02:00
$fileName = $this -> structured_directory . " physical_ " . $clientId . " .csv " ;
2023-05-19 11:58:21 +02:00
$csvFile = fopen ( $fileName , 'w' );
fputcsv ( $csvFile , [ 'id' , 'size' , 'creation_time' ]);
foreach ( $entries as $entry ) {
fputcsv ( $csvFile , [
$entry [ 'file_name' ],
$entry [ 'size' ],
2023-05-19 19:01:57 +02:00
$entry [ 'creation_time' ]
2023-05-19 11:58:21 +02:00
]);
}
fclose ( $csvFile );
}
2023-05-19 19:01:57 +02:00
/**
* Creates a CSV file containing the entries for a specific client ID .
*
* @ param string $clientId The client ID .
* @ param array $entries An array containing the entries for the client .
* @ return void
*/
2023-05-19 11:58:21 +02:00
2023-05-19 19:01:57 +02:00
private function createDBFileCSV ( $clientId , $entries ) : void
2023-05-19 11:58:21 +02:00
{
2023-05-19 19:01:57 +02:00
$fileName = $this -> structured_directory . " cassandra_ " . ( string ) $clientId . " .csv " ;
2023-05-19 11:58:21 +02:00
$csvFile = fopen ( $fileName , 'w' );
$headers = [ 'id' , 'size' , 'creation_time' , 'filename' , 'bucket' , 'client_id' , 'attachment_id' ];
fputcsv ( $csvFile , $headers );
foreach ( $entries as $key => $entry ) {
$rowData = [
$entry [ 'id' ],
$entry [ 'size' ],
$entry [ 'creation_time' ],
$entry [ 'filename' ],
$entry [ 'bucket' ],
2023-05-19 19:01:57 +02:00
$entry [ 'client_id' ],
2023-05-19 11:58:21 +02:00
$entry [ 'attachment_id' ],
];
fputcsv ( $csvFile , $rowData );
}
fclose ( $csvFile );
}
2023-05-19 19:01:57 +02:00
/**
* Retrieves entries from a database table .
*
* @ param string $tableName The name of the database table .
* @ return array An array containing the retrieved entries .
*/
2023-05-19 11:58:21 +02:00
private function getDbEntries ( $tableName )
2023-05-19 19:01:57 +02:00
{
if ( $this -> schema_version () === 1 ) {
2023-05-19 11:58:21 +02:00
$query = " SELECT client_id, id, size, filename, created_on FROM $tableName " ;
} else {
$query = " SELECT client_id, id, size, filename, created_on, bucket FROM $tableName " ;
}
$arguments = [];
$result = $this -> cassandra -> execute (
$query ,
[
'arguments' => $arguments ,
'page_size' => static :: DEFAULT_PAGE_SIZE
]
);
$entries = [];
while ( $result ) {
foreach ( $result as $row ) {
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
if ( preg_match ( '/^[0-9]+$/' , $row [ 'id' ])) {
$dotParts = explode ( " . " , $row [ 'filename' ], 2 );
$dynamicValue = substr ( $dotParts [ 1 ], 0 , 2 );
$timestamp = ( int ) $row [ 'created_on' ];
$date = date ( 'Y-m-d H:i:s' , $timestamp );
$creationTime = str_replace ( '"' , '' , $date );
$entry = [
" id " => $row [ 'id' ],
" size " => ( string ) $row [ 'size' ],
" creation_time " => $creationTime ,
'filename' => $row [ 'filename' ],
'bucket' => $row [ 'bucket' ] ? $row [ 'bucket' ] : '' ,
'client_id' => ( string ) $row [ 'client_id' ],
'attachment_id' => ( string ) $row [ 'id' ],
];
$entries [ $dynamicValue ][ $row [ 'filename' ]] = $entry ;
} else {
$clientId = substr ( $row [ 'id' ], 0 , 2 );
$file = $row [ 'client_id' ] . '-' . $row [ 'id' ];
$timestamp = ( int ) $row [ 'created_on' ];
$date = date ( 'Y-m-d H:i:s' , $timestamp );
$creationTime = str_replace ( '"' , '' , $date );
$entry = [
" id " => $file ,
" size " => ( string ) $row [ 'size' ],
" creation_time " => $creationTime ,
'filename' => $row [ 'filename' ],
'bucket' => $row [ 'bucket' ] ? $row [ 'bucket' ] : '' ,
'client_id' => ( string ) $row [ 'client_id' ],
'attachment_id' => ( string ) $row [ 'id' ],
];
if ( ! isset ( $entries [ $clientId ])) {
$entries [ $clientId ] = [];
}
$entries [ $clientId ][ $file ] = $entry ;
}
}
$result = $result -> nextPage ();
}
// Merge all entries into a single array
$allEntries = [];
foreach ( $entries as $clientEntries ) {
$allEntries = array_merge ( $allEntries , $clientEntries );
}
// Create CSV file for each client ID's physical file entries
foreach ( $entries as $clientId => $clientEntries ) {
$this -> createDBFileCSV ( $clientId , $clientEntries );
}
return $entries ;
}
2023-05-19 19:01:57 +02:00
/**
* Compares a file entry with its corresponding entry in the Cassandra file association .
*
* @ param string $id The ID of the file entry .
* @ param array $data An array containing data of the file entry .
* @ param array $cassandra_file_assoc The Cassandra file association .
* @ return array | null An array containing mismatched entries , or null if the entries match .
*/
2023-05-19 11:58:21 +02:00
private function compareFileEntries ( $id , $data , $cassandra_file_assoc )
{
if ( ! isset ( $cassandra_file_assoc [ $id ])) {
return [
'id' => $id ,
'file1' => [ $id , trim ( $data [ 0 ], '"' ), trim ( $data [ 1 ], '"' )],
];
} else {
$physical_value1 = trim ( $data [ 0 ], '"' );
$physical_value2 = trim ( $data [ 1 ], '"' );
$cassandra_value1 = trim ( $cassandra_file_assoc [ $id ][ 0 ], '"' );
$cassandra_value2 = trim ( $cassandra_file_assoc [ $id ][ 1 ], '"' );
if ( $physical_value1 !== $cassandra_value1 || $physical_value2 !== $cassandra_value2 ) {
return [
'id' => $id ,
'file1' => [ $id , $physical_value1 , $physical_value2 ],
'file2' => [ $id , $cassandra_value1 , $cassandra_value2 ],
];
}
}
return null ;
}
2023-05-19 19:01:57 +02:00
/**
* Retrieves the lines of a file and returns them as an array .
*
* @ param string $file The path to the file .
* @ return array An array containing the lines of the file .
*/
private function getFileLines ( $file ) : array
2023-05-19 11:58:21 +02:00
{
$file_contents = file_get_contents ( $file );
return explode ( " \n " , $file_contents );
}
2023-05-19 19:01:57 +02:00
/**
* Filters out null entries and maps the remaining entries to their first element .
*
* @ param array $entries An array containing entries to be filtered and mapped .
* @ return array An array of filtered and mapped entries .
*/
private function filterAndMapEntries ( $entries ) : array
2023-05-19 11:58:21 +02:00
{
$filtered_entries = array_filter ( $entries );
$mapped_entries = array_map ( function ( $entry ) {
return $entry [ 0 ];
}, $filtered_entries );
return array_values ( $mapped_entries );
}
2023-05-19 19:01:57 +02:00
/**
* Processes files in a directory , performs comparisons , and generates CSV and HTML reports .
*
* @ param string $dir The directory path containing the files to be processed .
* @ return void
*/
private function process_files_in_directory ( $dir ) : void
2023-05-19 11:58:21 +02:00
{
$files = glob ( $dir . '/*.csv' );
$physical_files = array ();
$cassandra_files = array ();
foreach ( $files as $file ) {
$filename = basename ( $file , '.csv' );
$csv_type = substr ( $filename , 0 , strpos ( $filename , '_' ));
$file_num = substr ( $filename , strpos ( $filename , '_' ) + 1 );
if ( $csv_type == 'physical' ) {
$physical_files [ $file_num ] = $file ;
} elseif ( $csv_type == 'cassandra' ) {
$cassandra_files [ $file_num ] = $file ;
}
}
ksort ( $physical_files );
ksort ( $cassandra_files );
$missing_physical_files = array ();
$missing_cassandra_entries = array ();
$cassandra_entries = [];
$physical_entries = [];
foreach ( $physical_files as $file_num => $physical_file ) {
$cassandra_file = $dir . '/cassandra_' . $file_num . '.csv' ;
if ( file_exists ( $physical_file ) && file_exists ( $cassandra_file )) {
$compared_physical [] = $this -> compare_csv_files ( $physical_file , $cassandra_file );
$compared_cassandra [] = $this -> compare_csv_files ( $cassandra_file , $physical_file );
$physical_entries = $this -> filterAndMapEntries ( $compared_cassandra );
$cassandra_entries = $this -> filterAndMapEntries ( $compared_physical );
} else {
2023-05-19 19:01:57 +02:00
if ( ! file_exists ( $physical_file )) {
$missing_physical_files [] = $physical_file ;
2023-05-19 11:58:21 +02:00
2023-05-19 19:01:57 +02:00
echo " cfl: " . var_dump ( $missing_physical_files ) . PHP_EOL ;
2023-05-19 11:58:21 +02:00
}
if ( ! file_exists ( $cassandra_file )) {
$missing_cassandra_files [] = $cassandra_file ;
}
}
if ( ! file_exists ( $cassandra_file )) {
$physical_file_lines = $this -> getFileLines ( $physical_file );
$file_assoc = array ();
foreach ( $physical_file_lines as $line ) {
if ( $line === reset ( $physical_file_lines )) {
continue ;
}
2023-05-19 19:01:57 +02:00
$values = explode ( " , " , $line );
2023-05-19 11:58:21 +02:00
if ( count ( $values ) == 3 ) {
$values [ 2 ] = str_replace ( '"' , '' , $values [ 2 ]);
$file_assoc [ $values [ 0 ]] = [ $values [ 1 ], $values [ 2 ]];
}
}
2023-05-19 19:01:57 +02:00
foreach ( $file_assoc as $id => $data ) {
2023-05-19 11:58:21 +02:00
if ( ! isset ( $cassandra_files_assoc [ $id ])) {
$missing_cassandra_entries [] = [
'id' => $id ,
'file1' => [ $id , $data [ 0 ], $data [ 1 ]],
];
}
}
} else {
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
}
}
foreach ( $cassandra_files as $file_num => $cassandra_file ) {
$physical_file = $dir . '/physical_' . $file_num . '.csv' ;
if ( ! file_exists ( $physical_file )) {
2023-05-19 19:01:57 +02:00
$cassandra_file_lines = $this -> getFileLines ( $cassandra_file );
2023-05-19 11:58:21 +02:00
$file_assoc = $this -> buildFileAssociation ( $cassandra_file_lines );
2023-05-19 19:01:57 +02:00
foreach ( $file_assoc as $id => $data ) {
2023-05-19 11:58:21 +02:00
if ( ! isset ( $physical_files_assoc [ $id ])) {
$missing_physical_files [] = [
'id' => $id ,
2023-05-19 19:01:57 +02:00
'file1' => [ $id , $data [ 0 ], $data [ 1 ], $data [ 2 ], $data [ 3 ], $data [ 4 ], $data [ 5 ]],
2023-05-19 11:58:21 +02:00
];
}
}
}
}
2023-05-19 19:01:57 +02:00
$result_from_cassandra_entries = array_unique ( array_merge ( $cassandra_entries , $missing_cassandra_entries ), SORT_REGULAR );
$result_from_physical_files = array_unique ( array_merge ( $physical_entries , $missing_physical_files ), SORT_REGULAR );
2023-05-19 11:58:21 +02:00
$this -> generateCsvReportForDbEntries ( $result_from_cassandra_entries , 'result_from_cassandra_entries.csv' );
$this -> generateHtmlReport ( $result_from_cassandra_entries , 'cassandra.html' );
$this -> generateCsvReportForPhysicalFiles ( $result_from_physical_files , 'result_from_physical_files.csv' );
$this -> generateHtmlReport ( $result_from_physical_files , 'physical.html' );
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
}
2023-05-19 19:01:57 +02:00
/**
* Builds an associative array from file lines .
*
* @ param array $file_lines An array containing lines of a file .
* @ return array An associative array representing the file association .
*/
private function buildFileAssociation ( $file_lines ) : array
2023-05-19 11:58:21 +02:00
{
$file_assoc = [];
foreach ( $file_lines as $line ) {
if ( $line === reset ( $file_lines )) {
continue ;
}
$values = explode ( " , " , $line );
if ( count ( $values ) == 3 ) {
$file_assoc [ $values [ 0 ]] = [ $values [ 1 ], $values [ 2 ]];
}
if ( count ( $values ) > 3 ) {
2023-05-19 19:01:57 +02:00
$file_assoc [ $values [ 0 ]] = [ $values [ 1 ], $values [ 2 ], $values [ 3 ], $values [ 4 ], $values [ 5 ], $values [ 6 ]];
2023-05-19 11:58:21 +02:00
}
}
return $file_assoc ;
}
2023-05-19 19:01:57 +02:00
/**
* Compares two CSV files and returns missing entries or entries with mismatched data .
*
* @ param string $file1_path The file path of the first CSV file .
* @ param string $file2_path The file path of the second CSV file .
* @ return array An array containing missing entries or entries with mismatched data .
*/
2023-05-19 11:58:21 +02:00
2023-05-19 19:01:57 +02:00
private function compare_csv_files ( $file1_path , $file2_path ) : array
2023-05-19 11:58:21 +02:00
{
$file1_data = array_map ( 'str_getcsv' , file ( $file1_path ));
$file2_data = array_map ( 'str_getcsv' , file ( $file2_path ));
$file1_headers = array_shift ( $file1_data );
$file2_headers = array_shift ( $file2_data );
// find indexes of columns in each file
$id_index_1 = array_search ( 'id' , $file1_headers );
$id_index_2 = array_search ( 'id' , $file2_headers );
$size_index_1 = array_search ( 'size' , $file1_headers );
$size_index_2 = array_search ( 'size' , $file2_headers );
$time_index_1 = array_search ( 'creation_time' , $file1_headers );
$time_index_2 = array_search ( 'creation_time' , $file2_headers );
$filename_index_1 = array_search ( 'filename' , $file1_headers );
$filename_index_2 = array_search ( 'filename' , $file2_headers );
$clientid_index_1 = array_search ( 'client_id' , $file1_headers );
$bucket_index_1 = array_search ( 'bucket' , $file1_headers );
$attachment_id_index_1 = array_search ( 'attachment_id' , $file1_headers );
$file1_assoc = array_reduce ( $file1_data , function ( $result , $row ) use ( $id_index_1 , $size_index_1 , $time_index_1 , $filename_index_1 , $clientid_index_1 , $bucket_index_1 , $attachment_id_index_1 ) {
$result [ $row [ $id_index_1 ]] = [
'id' => $row [ $id_index_1 ],
'file1' => [
$row [ $id_index_1 ],
$row [ $size_index_1 ],
$row [ $time_index_1 ],
isset ( $row [ $filename_index_1 ]) ? $row [ $filename_index_1 ] : null ,
isset ( $row [ $clientid_index_1 ]) ? $row [ $clientid_index_1 ] : null ,
2023-05-19 19:01:57 +02:00
isset ( $row [ $bucket_index_1 ]) ? $row [ $bucket_index_1 ] : null ,
2023-05-19 11:58:21 +02:00
isset ( $row [ $attachment_id_index_1 ]) ? $row [ $attachment_id_index_1 ] : null
],
];
return $result ;
}, []);
$file2_assoc = array_reduce ( $file2_data , function ( $result , $row ) use ( $id_index_2 , $size_index_2 , $time_index_2 , $filename_index_2 ) {
$result [ $row [ $id_index_2 ]] = [
'id' => $row [ $id_index_2 ],
'file2' => [
$row [ $id_index_2 ],
$row [ $size_index_2 ],
$row [ $time_index_2 ],
isset ( $row [ $filename_index_2 ]) ? $row [ $filename_index_2 ] : null
],
];
return $result ;
}, []);
$missing_entries = [];
foreach ( $file1_assoc as $id => $data ) {
if ( ! isset ( $file2_assoc [ $id ])) {
$missing_entries [] = [
'id' => $id ,
'file1' => $data [ 'file1' ],
];
} else {
$file2_data = $file2_assoc [ $id ][ 'file2' ];
2023-05-19 19:01:57 +02:00
if ( $data [ 'file1' ][ 0 ] !== $file2_data [ 0 ] || $data [ 'file1' ][ 1 ] !== $file2_data [ 1 ]) {
2023-05-19 11:58:21 +02:00
$missing_entries [] = [
'id' => $id ,
'file1' => $data [ 'file1' ],
'file2' => $file2_data ,
];
}
}
}
return $missing_entries ;
}
/**
* Generates a CSV report of inconsistent files .
*
* @ param array $inconsistentFiles An array of inconsistent files .
* @ param string $filename The filename to use for the report .
*/
private function generateCsvReportForPhysicalFiles ( array $inconsistentFiles , string $name ) : void
{
$fp = fopen ( $name , 'w' );
2023-05-19 19:01:57 +02:00
fputcsv ( $fp , [ 'File/Attachment' , 'File path' , 'File name' , 'Thumb 1' , 'Thumb 2' , 'Size' , 'Creation Time' , 'ClientId' , 'Bucket' , 'Id' ]);
2023-05-19 11:58:21 +02:00
foreach ( $inconsistentFiles as $row ) {
$check_value = $row [ 'id' ];
2023-05-19 19:01:57 +02:00
$filePath = $check_value ? $check_value : $row [ 'file2' ][ 0 ];
2023-05-19 11:58:21 +02:00
if ( is_numeric ( $row [ 'id' ])) {
2023-05-19 19:01:57 +02:00
$filePath = $row [ 'file1' ][ 3 ];
2023-05-19 11:58:21 +02:00
$check_value = $row [ 'file1' ][ 3 ];
2023-05-19 19:01:57 +02:00
}
2023-05-19 11:58:21 +02:00
$size = isset ( $row [ 'file1' ][ 1 ]) ? ( string ) $row [ 'file1' ][ 1 ] : filesize ( $filePath );
$creationTime = isset ( $row [ 'file1' ][ 2 ]) ? ( string ) $row [ 'file1' ][ 2 ] : date ( 'Y-m-d H:i:s' , filectime ( $filePath ));
$thumb1 = isset ( $row [ 'thumb1' ]) ? $row [ 'thumb1' ] : '' ;
$thumb2 = isset ( $row [ 'thumb2' ]) ? $row [ 'thumb2' ] : '' ;
$clientId = isset ( $row [ 'file1' ][ 4 ]) ? $row [ 'file1' ][ 4 ] : '' ;
$bucket = isset ( $row [ 'file1' ][ 5 ]) ? $row [ 'file1' ][ 5 ] : '' ;
$attachmentId = isset ( $row [ 'file1' ][ 5 ]) ? $row [ 'file1' ][ 6 ] : '' ;
fputcsv ( $fp , [
'Attachment' ,
$filePath ,
$check_value ,
$thumb1 ,
$thumb2 ,
$size ,
$creationTime ,
$clientId ,
$bucket ,
$attachmentId ,
]);
}
fclose ( $fp );
chmod ( $name , 0666 );
}
2023-05-19 19:01:57 +02:00
/**
* Generates a CSV report for inconsistent database entries .
*
* @ param array $inconsistentFiles An array containing inconsistent file data .
* @ param string $name The name of the CSV report file to be generated .
* @ return void
*/
2023-05-19 11:58:21 +02:00
private function generateCsvReportForDbEntries ( array $inconsistentFiles , string $name ) : void
{
$fp = fopen ( $name , 'w' );
fputcsv ( $fp , [ 'File/Attachment' , 'Entry Path' , 'Entry Name' , 'Thumb 1' , 'Thumb 2' , 'Size' , 'Creation Time' ]);
foreach ( $inconsistentFiles as $row ) {
2023-05-19 19:01:57 +02:00
$filePath = $this -> directory . '/' . $row [ 'file1' ][ 0 ] ? $this -> directory . '/' . $row [ 'file1' ][ 0 ] : $this -> directory . '/' . $row [ 'file2' ][ 0 ];
2023-05-19 11:58:21 +02:00
$size = isset ( $row [ 'file1' ][ 1 ]) ? ( string ) $row [ 'file1' ][ 1 ] : filesize ( $filePath );
$creationTime = isset ( $row [ 'file1' ][ 2 ]) ? ( string ) $row [ 'file1' ][ 2 ] : date ( 'Y-m-d H:i:s' , filectime ( $filePath ));
2023-05-19 19:01:57 +02:00
$thumb1 = $row [ 'file1' ][ 0 ] . '-thumb1' ;
$thumb2 = $row [ 'file1' ][ 0 ] . '-thumb2' ;
if ( is_string ( $row [ 'id' ]) && strpos ( $row [ 'id' ], '.' ) !== false ) {
$old_attachment = explode ( '.' , $row [ 'id' ])[ 0 ];
$thumb1 = '' ;
$thumb2 = '' ;
}
2023-05-19 11:58:21 +02:00
fputcsv ( $fp , [
2023-05-19 19:01:57 +02:00
'File' ,
2023-05-19 11:58:21 +02:00
$filePath ,
$row [ 'file1' ][ 0 ],
$thumb1 ,
$thumb2 ,
$size ,
2023-05-19 19:01:57 +02:00
$creationTime
2023-05-19 11:58:21 +02:00
]);
}
fclose ( $fp );
chmod ( $name , 0666 );
}
/**
* Generates an HTML report of inconsistent files .
*
* @ param array $inconsistentFiles An array of inconsistent files .
* @ param string $filename The filename to use for the report .
*/
private function generateHtmlReport ( array $inconsistentFiles , string $name ) : void
{
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
$file = fopen ( $name , 'w' );
if ( ! $file ) {
throw new Exception ( 'Failed to open the file for writing.' );
}
$html = ' < html >< head >< style >
table {
font - family : arial , sans - serif ;
border - collapse : collapse ;
width : 100 % ;
}
td , th {
border : 1 px solid #dddddd;
text - align : left ;
padding : 8 px ;
}
tr : nth - child ( even ) {
background - color : #dddddd;
}
</ style ></ head >< body >< table >< thead >< tr >< th > File / Attachment </ th >< th > File path </ th >< th > File name </ th >< th > Thumb 1 </ th >< th > Thumb 2 </ th >< th > Size </ th >< th > Creation Time </ th ></ tr ></ thead >< tbody > ' ;
foreach ( $inconsistentFiles as $row ) {
$filePath = $this -> directory . '/' . $row [ 'file1' ][ 0 ];
$size = isset ( $row [ 'file1' ][ 1 ]) ? ( string ) $row [ 'file1' ][ 1 ] : filesize ( $filePath );
$creationTime = isset ( $row [ 'file1' ][ 2 ]) ? ( string ) $row [ 'file1' ][ 2 ] : date ( 'Y-m-d H:i:s' , filectime ( $filePath ));
$thumb1 = isset ( $row [ 'thumb1' ]) ? $row [ 'thumb1' ] : '' ;
$thumb2 = isset ( $row [ 'thumb2' ]) ? $row [ 'thumb2' ] : '' ;
// Write the properties to the HTML table
$html .= '<tr><td>Attachment</td><td>' . htmlspecialchars ( $filePath ) . '</td><td>' . htmlspecialchars ( $row [ 'file1' ][ 0 ]) . '</td><td>' . htmlspecialchars ( $thumb1 ) . '</td><td>' . htmlspecialchars ( $thumb2 ) . '</td><td>' . htmlspecialchars ( $size ) . '</td><td>' . htmlspecialchars ( $creationTime ) . '</td></tr>' ;
}
$html .= '</tbody></table></body></html>' ;
fwrite ( $file , $html );
fclose ( $file );
}
2023-05-19 19:01:57 +02:00
/**
* Compares two CSV files and creates a new CSV file containing the matching entries .
*
* @ param string $firstFile The path to the first CSV file .
* @ param string $secondFile The path to the second CSV file .
* @ param string $finalFile The path to the final CSV file to be created .
* @ return void
*/
function compareCSVFilesTransform ( $firstFile , $secondFile , $finalFile ) : void
2023-05-19 11:58:21 +02:00
{
// Read the first CSV file
$firstData = array_map ( 'str_getcsv' , file ( $firstFile ));
$firstHeaders = array_shift ( $firstData );
// Read the second CSV file
$secondData = array_map ( 'str_getcsv' , file ( $secondFile ));
$secondHeaders = array_shift ( $secondData );
// Find the indexes of the columns to compare in both files
$firstIdIndex = array_search ( 'id' , $firstHeaders );
$firstSizeIndex = array_search ( 'size' , $firstHeaders );
$firstCreationTimeIndex = array_search ( 'creation_time' , $firstHeaders );
$secondIdIndex = array_search ( 'Entry Path' , $secondHeaders );
$secondSizeIndex = array_search ( 'size' , $secondHeaders );
$secondCreationTimeIndex = array_search ( 'creation_time' , $secondHeaders );
// Get the entries from the first file
$firstEntries = [];
foreach ( $firstData as $row ) {
$firstEntries [ $row [ $firstIdIndex ]] = [
'id' => $row [ $firstIdIndex ],
'size' => $row [ $firstSizeIndex ],
'creation_time' => $row [ $firstCreationTimeIndex ]
];
}
// Create the final result CSV file
$finalCsvFile = fopen ( $finalFile , 'w' );
fputcsv ( $finalCsvFile , [ 'id' , 'size' , 'creation_time' ]);
// Compare the entries from the second file with the entries in the first file
foreach ( $secondData as $row ) {
$secondId = $row [ $secondIdIndex ];
$secondSize = $row [ $secondSizeIndex ];
$secondCreationTime = $row [ $secondCreationTimeIndex ];
// Check if there is a matching entry in the first file
if (
isset ( $firstEntries [ $secondId ]) &&
$firstEntries [ $secondId ][ 'size' ] === $secondSize &&
$firstEntries [ $secondId ][ 'creation_time' ] === $secondCreationTime
) {
fputcsv ( $finalCsvFile , [
$secondId ,
$secondSize ,
$secondCreationTime
]);
}
}
fclose ( $finalCsvFile );
}
2023-05-19 19:01:57 +02:00
/**
* Deletes physical files based on the entries listed in a CSV file .
*
* @ param string $csvFile The path to the CSV file containing the list of files to delete .
* @ return void
*/
2023-05-19 11:58:21 +02:00
private function deletePhysicalFilesFromCsv ( string $csvFile ) : void
{
$directory = $this -> directory . '/' ;
$entries = [];
if (( $handle = fopen ( $csvFile , 'r' )) !== false ) {
// Collect the file paths to be deleted
while (( $data = fgetcsv ( $handle )) !== false ) {
// Skip the header row
if ( $data [ 0 ] === 'File/Attachment' ) {
continue ;
}
$filePath = $directory . $data [ 1 ];
$entries [] = $filePath ;
}
fclose ( $handle );
}
// Prompt the user for confirmation before deleting the files
echo " Before proceeding with the deletion, make sure you have a backup of your data. " . PHP_EOL ;
echo " You can revert back to the backup in case of accidental data loss. " . PHP_EOL ;
echo " Do you want to delete the physical files listed in the CSV report? (yes/no): " ;
$confirmation = trim ( fgets ( STDIN ));
if ( strtolower ( $confirmation ) === 'yes' ) {
// Create a log file to record the deleted files
$logFile = 'deleted_files.log' ;
$logHandle = fopen ( $logFile , 'a' );
// Delete the physical files
foreach ( $entries as $filePath ) {
if ( file_exists ( $filePath )) {
// unlink($filePath);
echo " File deleted: $filePath " . PHP_EOL ;
// Write the deleted file path to the log file
fwrite ( $logHandle , " Deleted file: $filePath " . PHP_EOL );
} else {
echo " File not found: $filePath " . PHP_EOL ;
}
}
fclose ( $logHandle );
echo " Deletion completed. The list of deleted files is saved in ' $logFile '. " . PHP_EOL ;
} else {
echo " Deletion of physical files aborted. " . PHP_EOL ;
}
}
/**
* Returns cassandra schema version
*
*
* @ return int
*/
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
private function schema_version ()
{
return static :: $schemaVersion ;
}
/**
* Returns info ( id , size ) for attachment file
*
* @ param int $clientId
* @ param string | null $bucketId
* @ param string $id
*
* @ return object
*/
private function get_info ( int $clientId , ? string $bucketId , string $id ) : ? object
{
$attachment = null ;
$args = [
'client_id' => $clientId ,
'id' => $id
];
if ( $this -> schema_version () == 1 ) {
2023-05-19 19:01:57 +02:00
$query = $this -> cassandra -> prepare ( 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ?' );
} else {
$q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\'' ;
echo " Q: [ " . $q . " ] \n " ;
2023-05-19 11:58:21 +02:00
$query = $this -> cassandra -> prepare ( $q );
}
$res = $this -> cassandra -> execute (
$query ,
[
'arguments' => $args
]
);
if ( $res && $res -> valid ()) {
2023-05-19 19:01:57 +02:00
$tmp = $res -> current ();
$attachment = ( object ) $tmp ;
$attachment -> id = ( string ) $tmp [ 'id' ];
if ( array_key_exists ( 'size' , $tmp )) {
$attachment -> size = ( int ) $tmp [ 'size' ];
}
}
2023-05-19 11:58:21 +02:00
return $attachment ;
}
/**
* prepares 'bucket' field for partitioning
*
* @ param string $id
* @ return string
*/
private function _set_bucket ( string $id ) : string
{
return substr ( $id , 0 , static :: $bucketMagic );
}
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
/**
* updates attachment count and size
*
* @ param boolean $add
* @ param integer $size
*
* @ return void
*/
private function _update_attachment_stats ( bool $add , int $size ) : void
{
$op = $add ? '+' : '-' ;
$query = $this -> cassandra -> prepare ( 'UPDATE attachment_stats SET count = count ' . $op . ' 1 where client_id = ?' );
$this -> cassandra -> execute ( $query , [ 'arguments' => [ 'client_id' => ( int ) static :: $clientId ]]);
$query = $this -> cassandra -> prepare (
'UPDATE attachment_stats SET size = size ' . $op . ' ' . $size . ' where client_id = ?'
);
$this -> cassandra -> execute ( $query , [ 'arguments' => [ 'client_id' => static :: $clientId ]]);
}
/**
* returns full attachment table key for given id
*
* @ param string $id
*
* @ return object | null
*/
private function _get_attachment_key ( string $id ) : ? object
{
$result = null ;
$query = $this -> cassandra -> prepare ( 'SELECT * from attachment_ids where id = ? AND bucket = ? AND client_id = ?' );
$arguments = [
'client_id' => static :: $clientId ,
'bucket' => $this -> _set_bucket ( $id ),
'id' => $id ,
2023-05-19 19:01:57 +02:00
];
$data = $this -> cassandra -> execute ( $query , [ 'arguments' => $arguments ]);
2023-05-19 11:58:21 +02:00
if ( $data && $data -> valid ()) {
$result = ( object ) $data -> current ();
2023-05-19 19:01:57 +02:00
}
;
2023-05-19 11:58:21 +02:00
return $result ;
}
/**
* returns single attachment data for provided ID
*
* @ param string $attachmentId
*
* @ return array
*/
private function _get_attachment_by_id ( string $attachmentId ) : ? object
{
$attachment = null ;
try {
2023-05-19 19:01:57 +02:00
$properties = '*' ;
echo " attid: " . $attachmentId . PHP_EOL ;
$key = $this -> _get_attachment_key ( $attachmentId );
2023-05-19 11:58:21 +02:00
if ( $key ) {
if ( $this -> schema_version () === 1 ) {
2023-05-19 19:01:57 +02:00
$query = $this -> cassandra -> prepare ( 'SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' );
2023-05-19 11:58:21 +02:00
$arguments = [
'client_id' => static :: $clientId ,
'id' => $key -> id ,
'project_id' => $key -> project_id ,
'entity_type' => $key -> entity_type ,
];
} else {
$query = $this -> cassandra -> prepare ( 'SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?' );
$arguments = [
'client_id' => static :: $clientId ,
'id' => $key -> id ,
'project_id' => $key -> project_id ,
'entity_id' => $key -> entity_id ,
'entity_type' => $key -> entity_type ,
];
}
$attachment = $this -> cassandra -> execute ( $query , [ 'arguments' => $arguments ]);
}
} catch ( Cassandra\Exception\InvalidArgumentException $e ) {
2023-05-19 19:01:57 +02:00
}
//echo "ATTA: \n";
//var_dump($attachment);
2023-05-19 11:58:21 +02:00
return $attachment != null && $attachment -> valid () ? $this -> _convert_to_object ( $attachment -> current ()) : null ;
}
/**
* deletes attachment_ids entry
*
* @ param string $id
*/
private function _delete_attachment_key ( string $id ) : void
{
$result = null ;
$query = $this -> cassandra -> prepare ( 'DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?' );
$arguments = [
'client_id' => static :: $clientId ,
'bucket' => $this -> _set_bucket ( $id ),
'id' => $id ,
];
$this -> cassandra -> executeAsync ( $query , [ 'arguments' => $arguments ]);
}
/**
* updates attachment data references
*
* @ param string $data_id
* @ param bool $add
* @ param string $attachmentId
*
* @ return void
*/
private function _update_file_refs ( string $data_id , bool $add , string $attachmentId = '' ) : void
{
$queryArguments = [
'client_id' => static :: $clientId ,
'bucket' => $this -> _set_bucket ( $data_id ),
'id' => $data_id ,
];
$query = $this -> cassandra -> prepare ( 'UPDATE attachment_file_refs SET ref_count = ref_count ' . ( $add ? '+' : '-' ) . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?' );
$this -> cassandra -> executeAsync ( $query , [ 'arguments' => $queryArguments ]);
if ( ! empty ( $attachmentId )) {
$queryArguments [ 'attachment_id' ] = $attachmentId ;
if ( $add ) {
$query = $this -> cassandra -> prepare ( 'INSERT INTO attachment_file_ids (client_id,bucket,id,attachment_id) VALUES(?,?,?,?)' );
} else {
$query = $this -> cassandra -> prepare ( 'DELETE FROM attachment_file_ids WHERE client_id = ? AND bucket = ? AND attachment_id = ? AND id = ?' );
}
$this -> cassandra -> executeAsync ( $query , [ 'arguments' => $queryArguments ]);
}
}
/**
* Deletes attachment from Cassandra
*
*
* @ param int $clientId
* @ param string | null $bucketId
* @ param string $id
*
* @ return bool
*/
public function deleteAttachment ( int $clientId , ? string $bucketId , string $id ) : bool
{
2023-05-19 19:01:57 +02:00
static :: $clientId = $clientId ;
2023-05-19 11:58:21 +02:00
$refData = [
'client_id' => $clientId ,
'id' => $id ,
];
2023-05-19 19:01:57 +02:00
$fileInfo = $this -> get_info ( $clientId , $bucketId , $id );
2023-05-19 11:58:21 +02:00
if ( $fileInfo ) {
$this -> _update_attachment_stats ( false , $fileInfo -> size );
2023-05-19 19:01:57 +02:00
} else {
return false ;
}
$q = 'DELETE FROM attachment_file_info WHERE id = ? AND client_id = ? ' ;
if ( $this -> schema_version () === 2 ) {
$q = $q . ' AND bucket = \'' . $bucketId . '\'' ;
}
2023-05-19 11:58:21 +02:00
$query = $this -> cassandra -> prepare ( $q );
2023-05-19 19:01:57 +02:00
$this -> cassandra -> execute ( $query , [ 'arguments' => $refData ]);
2023-05-19 11:58:21 +02:00
2023-05-19 19:01:57 +02:00
echo " DEL REFS \n " ;
2023-05-19 11:58:21 +02:00
$refData [ 'bucket' ] = $this -> _set_bucket ( $id );
$query = $this -> cassandra -> prepare (
'DELETE FROM attachment_file_refs WHERE bucket = ? AND id = ? AND client_id = ?'
);
$result = $this -> cassandra -> execute ( $query , [ 'arguments' => $refData ]);
// get all attachments with deleted file and remove them
$query = $this -> cassandra -> prepare (
'SELECT attachment_id FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?'
);
$result = $this -> cassandra -> execute ( $query , [ 'arguments' => $refData ]);
2023-05-19 19:01:57 +02:00
echo " DEL ATTACHMENTS \n " ;
2023-05-19 11:58:21 +02:00
if ( $this -> schema_version () === 1 ) {
$delQuery = $this -> cassandra -> prepare (
'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?'
);
} else {
$delQuery = $this -> cassandra -> prepare (
'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?'
);
2023-05-19 19:01:57 +02:00
}
2023-05-19 11:58:21 +02:00
while ( $result && $result -> valid ()) {
$attachmentId = $result -> current ()[ 'attachment_id' ];
2023-05-19 19:01:57 +02:00
$key = $this -> _get_attachment_key (( string ) $attachmentId );
if ( $key == null ) {
$result -> next ();
continue ;
}
if ( $this -> schema_version () === 1 ) {
2023-05-19 11:58:21 +02:00
$selectQuery = $this -> cassandra -> prepare (
'SELECT entity_id FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?'
);
$attachment = $this -> cassandra -> execute ( $selectQuery , [
'arguments' => [
'client_id' => static :: $clientId ,
'project_id' => $key -> project_id ,
'entity_type' => $key -> entity_type ,
'id' => $attachmentId ,
]
]);
$entity_id = $attachment -> current ()[ 'entity_id' ];
$delArgs = [
'client_id' => static :: $clientId ,
'project_id' => $key -> project_id ,
'entity_type' => $key -> entity_type ,
'id' => $attachmentId ,
];
} else {
$entity_id = $key -> entity_id ;
$delArgs = [
'client_id' => static :: $clientId ,
'project_id' => $key -> project_id ,
'entity_type' => $key -> entity_type ,
'entity_id' => $entity_id ,
'id' => $attachmentId ,
];
}
$this -> _delete_attachment_key (( string ) $attachmentId );
$this -> cassandra -> execute ( $delQuery , [ 'arguments' => $delArgs ]);
$result -> next ();
}
$query = $this -> cassandra -> prepare (
'DELETE FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?'
);
2023-05-19 19:01:57 +02:00
$this -> cassandra -> execute ( $query , [ 'arguments' => $refData ]);
$result = false ;
$attachment = $this -> _get_attachment_by_id ( $id );
if ( $attachment ) {
if ( $this -> schema_version () === 1 ) {
$query = $this -> cassandra -> prepare ( " DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_type = ? AND client_id = ? " );
$arguments = [
'arguments' => [
'client_id' => static :: $clientId ,
'id' => $id ,
'project_id' => $attachment -> project_id ,
'entity_type' => $attachment -> entity_type
],
];
} else {
$query = $this -> cassandra -> prepare ( " DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_id = ? AND entity_type = ? AND client_id = ? " );
$arguments = [
'arguments' => [
'client_id' => static :: $clientId ,
'id' => $id ,
'project_id' => $attachment -> project_id ,
'entity_type' => $attachment -> entity_type ,
'entity_id' => $attachment -> entity_id
],
];
}
$queryResult = $this -> cassandra -> execute ( $query , $arguments ) != null ;
if ( $queryResult ) {
$result = true ;
$this -> _update_file_refs ( $attachment -> data_id , false , $attachment -> id );
$this -> _delete_attachment_key ( $id );
}
}
2023-05-19 11:58:21 +02:00
return $result ;
}
/**
* Deletes attachments provided in file
*
*
* @ param string $file
* @ param string $src ( 'cassandra' / 'file' )
*
* @ return void
*
* example file :
*
* cassandra , / test1 / testx , testx , testx - thumb1 , testx - thumb2 , 4343 , 20 - 02 - 22 13 : 30 , 1 , a , abc - def
* cassandra , / test1 / testx1 , testx1 , testx1 - thumb1 , testx1 - thumb2 , 4343 , 20 - 02 - 22 13 : 30 , 1 , 1 , 1 abc - def
* file , / tmp / testx3f , testx3f , testx3f - thumb1 , testx3f - thumb2 , 4343 , 20 - 02 - 22 13 : 30 ,,,
* cassandra , / test1 / testx3 , testx3 , testx3 - thumb1 , testx3 - thumb2 , 4343 , 20 - 02 - 22 13 : 30 , 1 , 3 , 3 abc - def
*/
2023-05-19 19:01:57 +02:00
public function processAttachmentDeletionCSV ( string $file , string $src ) : void
2023-05-19 11:58:21 +02:00
{
echo " Before proceeding with the deletion, make sure you have a backup of your data. " . PHP_EOL ;
echo " You can revert back to the backup in case of accidental data loss. " . PHP_EOL ;
echo " Do you want to delete the physical files listed in the CSV report? (yes/no): " ;
$confirmation = trim ( fgets ( STDIN ));
if ( strtolower ( $confirmation ) !== 'yes' ) {
return ;
}
$file_contents = file_get_contents ( $file );
$file_lines = explode ( " \n " , $file_contents );
$logFile = 'deleted_files.log' ;
$logHandle = fopen ( $logFile , 'a' );
foreach ( $file_lines as $line ) {
if ( $line === reset ( $file_lines )) {
continue ;
}
$values = explode ( " , " , $line );
if ( $values [ 0 ] === 'File/Attachment' ) {
2023-05-19 19:01:57 +02:00
continue ;
2023-05-19 11:58:21 +02:00
}
if ( count ( $values ) >= 7 ) {
$data = ( object ) array ();
2023-05-19 19:01:57 +02:00
$data -> source = $values [ 0 ];
2023-05-19 11:58:21 +02:00
$data -> path = $values [ 1 ];
$data -> name = $values [ 2 ];
$data -> thumb1 = $values [ 3 ];
$data -> thumb2 = $values [ 4 ];
$data -> size = $values [ 5 ];
$data -> created = $values [ 6 ];
2023-05-19 19:01:57 +02:00
$path = dirname ( $data -> path );
if ( count ( $values ) >= 10 ) {
2023-05-19 11:58:21 +02:00
$data -> clientId = ( int ) $values [ 7 ];
$data -> bucket = $values [ 8 ];
$data -> id = $values [ 9 ];
}
2023-05-19 19:01:57 +02:00
if ( $data -> source === 'Attachment' && $src === 'cassandra' ) {
echo " will delete " . $data -> clientId . " : " . $data -> bucket . " : " . $data -> id . PHP_EOL ;
$this -> deleteAttachment ( $data -> clientId , $data -> bucket , $data -> id );
fwrite ( $logHandle , " Deleted attachment: $data->id " . PHP_EOL );
} else if ( $data -> source === 'File' && $src === 'file' ) {
$filePath = $values [ 1 ];
$thumb1Path = $path . '/' . $values [ 3 ];
$thumb2Path = $path . '/' . $values [ 4 ];
if ( file_exists ( $thumb1Path ) && file_exists ( $thumb2Path )) {
files :: delete ( $thumb1Path );
files :: delete ( $thumb2Path );
fwrite ( $logHandle , " Deleted thumbnail: " . $thumb1Path . PHP_EOL );
fwrite ( $logHandle , " Deleted thumbnail: " . $thumb2Path . PHP_EOL );
}
if ( file_exists ( $filePath )) {
unlink ( $filePath );
unlink ( $thumb1Path );
unlink ( $thumb2Path );
echo " File deleted: $filePath " . PHP_EOL ;
2023-05-19 11:58:21 +02:00
// Write the deleted file path to the log file
2023-05-19 19:01:57 +02:00
fwrite ( $logHandle , " Deleted file: $filePath " . PHP_EOL );
} else {
echo " File not found: $filePath " . PHP_EOL ;
2023-05-19 11:58:21 +02:00
}
}
}
}
fclose ( $logHandle );
echo " Deletion completed. The list of attachments is saved in ' $logFile '. " . PHP_EOL ;
}
}
$checker = new DataConsistencyChecker ();
2023-05-19 19:01:57 +02:00
$checker -> checkConsistency ( " attachment_file_info " , true );