2023-05-19 11:58:21 +02:00
< ? php
2023-05-29 08:23:39 +02:00
/**
* Class DataConsistencyChecker
*
* The DataConsistencyChecker class is responsible for checking the consistency of data .
*/
2023-05-19 11:58:21 +02:00
class DataConsistencyChecker
{
private $_cluster ;
private $session ;
private $cassandra ;
private $directory ;
2023-05-19 19:01:57 +02:00
private $structured_directory ;
private $retrived_csv ;
2023-05-29 08:23:39 +02:00
private static $clientId ;
2023-05-19 11:58:21 +02:00
private static $schemaVersion = 1 ;
2023-05-29 08:23:39 +02:00
private static $bucketMagic = 4 ;
private const DEFAULT_PAGE_SIZE = 30 ;
private const CASSANDRA_RESULT_CSV = 'result_from_cassandra_entries.csv' ;
private const CASSANDRA_HTML_REPORT = 'cassandra.html' ;
private const PHYSICAL_RESULT_CSV = 'result_from_physical_files.csv' ;
private const PHYSICAL_HTML_REPORT = 'physical.html' ;
private const CSV_COLUMN_FILE_ATTACHMENT = 'File/Attachment' ;
private const CSV_COLUMN_FILE_PATH = 'File path' ;
private const CSV_COLUMN_FILE_NAME = 'File name' ;
private const CSV_COLUMN_THUMB1 = 'Thumb 1' ;
private const CSV_COLUMN_THUMB2 = 'Thumb 2' ;
private const CSV_COLUMN_SIZE = 'Size' ;
private const CSV_COLUMN_CREATION_TIME = 'Creation Time' ;
private const CSV_COLUMN_CLIENT_ID = 'ClientId' ;
private const CSV_COLUMN_BUCKET = 'Bucket' ;
private const CSV_COLUMN_ID = 'Id' ;
private const CSV_EXTENSION = '.csv' ;
private const NUM_FIELDS = 3 ;
private const DELIMITER = '-' ;
2023-05-19 11:58:21 +02:00
public function __construct ( $directory = null )
{
if ( $directory !== null ) {
$this -> directory = $directory ;
}
$this -> runFromCommandLine ( $_SERVER [ 'argv' ]);
}
2023-05-19 19:01:57 +02:00
/**
* Initializes the Cassandra connection based on the configuration settings .
*
* @ return void
*/
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
public function init () : void
{
2023-05-29 08:23:39 +02:00
$config = parse_ini_file ( 'config.ini' , true );
$cassandraConfig = $config [ 'CASSANDRA' ];
2023-05-19 11:58:21 +02:00
$this -> _cluster = Cassandra :: cluster ()
2023-05-29 08:23:39 +02:00
-> withContactPoints ( $cassandraConfig [ 'host' ])
-> withPort ( $cassandraConfig [ 'port' ])
2023-05-19 19:01:57 +02:00
-> withCredentials (
2023-05-29 08:23:39 +02:00
$cassandraConfig [ 'user' ],
$cassandraConfig [ 'password' ]
2023-05-19 19:01:57 +02:00
)
2023-05-19 11:58:21 +02:00
-> build ();
if ( $this -> _cluster ) {
try {
2023-05-29 08:23:39 +02:00
$this -> cassandra = $this -> _cluster -> connect ( $cassandraConfig [ 'keyspace' ]);
2023-05-19 11:58:21 +02:00
} catch ( Exception $e ) {
2023-05-29 08:23:39 +02:00
echo " An error occurred: " . $e -> getMessage () . " \n " ;
2023-05-19 11:58:21 +02:00
}
}
}
2023-05-19 19:01:57 +02:00
/**
* Runs the script from the command line with the provided arguments .
*
* @ param array $arguments The command line arguments .
* @ return void
*/
2023-05-29 08:23:39 +02:00
public function runFromCommandLine ( array $arguments ) : void
{
$shortOptions = " hd:v:o:r:s: " ;
$longOptions = [ " help " , " directory: " , " version: " , " v " , " output: " , " o " , " remove: " , " r " , " source: " , " s " ];
$options = getopt ( $shortOptions , $longOptions );
if ( count ( $options ) === 0 || isset ( $options [ 'h' ]) || isset ( $options [ 'help' ])) {
$this -> displayHelpMessage ();
exit ;
}
$directory = $options [ 'directory' ] ? ? $options [ 'd' ] ? ? null ;
$schemaVersion = $options [ 'version' ] ? ? $options [ 'v' ] ? ? null ;
$source = $options [ 'source' ] ? ? $options [ 's' ] ? ? null ;
$remove = $options [ 'remove' ] ? ? $options [ 'r' ] ? ? null ;
$structured_directory = $options [ 'output' ] ? ? $options [ 'o' ] ? ? null ;
if ( ! file_exists ( $structured_directory )) {
if ( $structured_directory === null ) {
echo " Output directory is required. Please specify the --output option. \n " ;
exit ;
}
mkdir ( $structured_directory , 0777 , true );
}
$this -> structured_directory = $structured_directory ;
if (( $directory === null || $schemaVersion === null ) && $remove === null && $source === null ) {
echo " Missing Attachment directory or schema version. \n " ;
exit ;
}
if ( $schemaVersion && ! in_array ( $schemaVersion , [ 1 , 2 ])) {
echo " Invalid schema version. Only versions 1 and 2 are supported. \n " ;
exit ;
}
if ( $structured_directory === null ) {
echo " Output directory is required. Please specify the --output option. \n " ;
2023-05-19 11:58:21 +02:00
exit ;
2023-05-29 08:23:39 +02:00
}
static :: $schemaVersion = ( int ) $schemaVersion ;
$this -> directory = $directory ;
$this -> retrived_csv = './result_from_physical_files.csv' ;
if ( $remove && $source ) {
$this -> processAttachmentDeletionCSV ( $remove , $source );
} else {
$this -> checkConsistency ( 'attachment_file_info' );
if ( is_dir ( $this -> structured_directory )) {
$this -> removeDirectory ( $this -> structured_directory );
}
}
exit ;
}
2023-05-19 11:58:21 +02:00
2023-05-19 19:01:57 +02:00
/**
* Displays the help message with instructions on how to use the script .
*
* @ return void
*/
2023-05-29 08:23:39 +02:00
2023-05-19 19:01:57 +02:00
private function displayHelpMessage () : void
2023-05-19 11:58:21 +02:00
{
$helpMessage = <<< EOT
Usage :
php script . php [ options ]
Options :
- h , -- help Display this help screen .
-- version Set the schema version .
-- directory Set the directory path for attachments .
2023-05-19 19:01:57 +02:00
-- output Set the folder for temp files
-- remove We need to choose between file and cassandra what we want to remove
2023-05-19 11:58:21 +02:00
Example :
2023-05-19 19:01:57 +02:00
php script_name -- version schema_version -- directory =/ path / to / directory -- output ./ out /
For Delete :
php script_name -- remove result_from_cassandra_entries . csv -- source file - to remove missing physical files
php script_name -- remove result_from_physical_files . csv -- source cassandra - to remove missing cassandra entries
2023-05-19 11:58:21 +02:00
EOT ;
echo $helpMessage ;
}
2023-05-19 19:01:57 +02:00
/**
* Recursively removes a directory and its contents .
*
* @ param string $directory The directory path to be removed .
* @ return void
*/
2023-05-29 08:23:39 +02:00
private function removeDirectory ( string $directory ) : void
2023-05-19 11:58:21 +02:00
{
2023-05-19 19:01:57 +02:00
if ( ! is_dir ( $directory )) {
return ;
}
2023-05-29 08:23:39 +02:00
$files = array_diff (
scandir ( $directory ),
[ '.' , '..' ]
);
2023-05-19 19:01:57 +02:00
foreach ( $files as $file ) {
$path = $directory . '/' . $file ;
if ( is_dir ( $path )) {
$this -> removeDirectory ( $path );
} else {
unlink ( $path );
}
}
rmdir ( $directory );
}
/**
* Checks the consistency between database entries and file entries .
*
* @ param string $tableName The name of the table in the database to check consistency for .
* @ return void
*/
2023-05-29 08:23:39 +02:00
public function checkConsistency ( string $tableName ) : void
2023-05-19 19:01:57 +02:00
{
2023-05-19 11:58:21 +02:00
$this -> init ();
$dbEntries = $this -> getDbEntries ( $tableName );
$fileEntries = $this -> getFileEntries ( $this -> directory );
$this -> process_files_in_directory ( $this -> structured_directory );
}
2023-05-19 19:01:57 +02:00
/**
* Retrieves file entries from a directory and organizes them based on dynamic values .
*
* @ param string $directory The directory path to retrieve file entries from .
* @ return array An array containing the file entries organized by dynamic values .
*/
2023-05-29 08:23:39 +02:00
private function getFileEntries ( string $directory ) : array
2023-05-19 11:58:21 +02:00
{
$files = glob ( $directory . '/*' );
$entries = [];
foreach ( $files as $file ) {
if ( is_file ( $file )) {
$fileName = basename ( $file );
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
if ( strpos ( $fileName , '-thumb1' ) !== false || strpos ( $fileName , '-thumb2' ) !== false ) {
continue ;
}
2023-05-29 08:23:39 +02:00
$dashParts = explode ( self :: DELIMITER , $fileName , 2 );
$dotParts = explode ( '.' , $fileName );
2023-05-19 11:58:21 +02:00
if ( count ( $dashParts ) === 2 ) {
$clientId = $dashParts [ 0 ];
$id = $dashParts [ 1 ];
$fileParts = $dashParts ;
2023-05-29 08:23:39 +02:00
$delimiter = self :: DELIMITER ;
2023-05-19 11:58:21 +02:00
} elseif ( count ( $dotParts ) >= 2 ) {
$clientId = $dotParts [ 0 ];
2023-05-29 08:23:39 +02:00
$id = implode ( '.' , array_slice ( $dotParts , 1 ));
2023-05-19 11:58:21 +02:00
$fileParts = $dotParts ;
$delimiter = '.' ;
} else {
continue ;
}
$filePath = $file ;
$size = filesize ( $filePath );
$creationTime = date ( 'Y-m-d H:i:s' , filemtime ( $filePath ));
$dynamicValue = substr ( $id , 0 , 2 );
$creationTime = str_replace ( '"' , '' , $creationTime );
$entries [ $dynamicValue ][] = [
'client_id' => $clientId ,
'file_path' => $filePath ,
'file_name' => implode ( $delimiter , $fileParts ),
'size' => $size ,
'creation_time' => $creationTime ,
];
}
}
// create CSV file for each client ID's physical file entries
foreach ( $entries as $clientId => $clientEntries ) {
$this -> createPhysicalFileCSV ( $clientId , $clientEntries );
}
return $entries ;
}
2023-05-19 19:01:57 +02:00
/**
* Creates a CSV file containing the physical file entries for a specific client ID .
*
* @ param string $clientId The client ID .
* @ param array $entries An array containing the physical file entries for the client .
* @ return void
*/
2023-05-19 11:58:21 +02:00
2023-05-29 08:23:39 +02:00
private function createPhysicalFileCSV ( string $clientId , array $entries ) : void
2023-05-19 11:58:21 +02:00
{
2023-05-29 08:23:39 +02:00
$fileName = $this -> structured_directory . 'physical_' . $clientId . self :: CSV_EXTENSION ;
2023-05-19 11:58:21 +02:00
$csvFile = fopen ( $fileName , 'w' );
fputcsv ( $csvFile , [ 'id' , 'size' , 'creation_time' ]);
foreach ( $entries as $entry ) {
fputcsv ( $csvFile , [
$entry [ 'file_name' ],
$entry [ 'size' ],
2023-05-19 19:01:57 +02:00
$entry [ 'creation_time' ]
2023-05-19 11:58:21 +02:00
]);
}
fclose ( $csvFile );
}
2023-05-19 19:01:57 +02:00
/**
* Creates a CSV file containing the entries for a specific client ID .
*
* @ param string $clientId The client ID .
* @ param array $entries An array containing the entries for the client .
* @ return void
*/
2023-05-19 11:58:21 +02:00
2023-05-29 08:23:39 +02:00
private function createDBFileCSV ( string $clientId , array $entries ) : void
2023-05-19 11:58:21 +02:00
{
2023-05-29 08:23:39 +02:00
$fileName = $this -> structured_directory . 'cassandra_' . ( string ) $clientId . self :: CSV_EXTENSION ;
2023-05-19 11:58:21 +02:00
$csvFile = fopen ( $fileName , 'w' );
$headers = [ 'id' , 'size' , 'creation_time' , 'filename' , 'bucket' , 'client_id' , 'attachment_id' ];
fputcsv ( $csvFile , $headers );
foreach ( $entries as $key => $entry ) {
$rowData = [
$entry [ 'id' ],
$entry [ 'size' ],
$entry [ 'creation_time' ],
$entry [ 'filename' ],
$entry [ 'bucket' ],
2023-05-19 19:01:57 +02:00
$entry [ 'client_id' ],
2023-05-19 11:58:21 +02:00
$entry [ 'attachment_id' ],
];
fputcsv ( $csvFile , $rowData );
}
fclose ( $csvFile );
}
2023-05-19 19:01:57 +02:00
/**
* Retrieves entries from a database table .
*
* @ param string $tableName The name of the database table .
* @ return array An array containing the retrieved entries .
*/
2023-05-19 11:58:21 +02:00
2023-05-29 08:23:39 +02:00
private function getDbEntries ( string $tableName ) : array
2023-05-19 19:01:57 +02:00
{
2023-05-29 08:23:39 +02:00
$query = sprintf (
'SELECT client_id, id, size, filename, created_on%s FROM %s' ,
$this -> schema_version () === 1 ? '' : ', bucket' ,
$tableName
);
2023-05-19 11:58:21 +02:00
$arguments = [];
$result = $this -> cassandra -> execute (
$query ,
[
'arguments' => $arguments ,
'page_size' => static :: DEFAULT_PAGE_SIZE
]
);
$entries = [];
while ( $result ) {
foreach ( $result as $row ) {
if ( preg_match ( '/^[0-9]+$/' , $row [ 'id' ])) {
$dotParts = explode ( " . " , $row [ 'filename' ], 2 );
$dynamicValue = substr ( $dotParts [ 1 ], 0 , 2 );
$timestamp = ( int ) $row [ 'created_on' ];
$date = date ( 'Y-m-d H:i:s' , $timestamp );
$creationTime = str_replace ( '"' , '' , $date );
$entry = [
" id " => $row [ 'id' ],
" size " => ( string ) $row [ 'size' ],
" creation_time " => $creationTime ,
'filename' => $row [ 'filename' ],
'bucket' => $row [ 'bucket' ] ? $row [ 'bucket' ] : '' ,
'client_id' => ( string ) $row [ 'client_id' ],
'attachment_id' => ( string ) $row [ 'id' ],
];
$entries [ $dynamicValue ][ $row [ 'filename' ]] = $entry ;
} else {
$clientId = substr ( $row [ 'id' ], 0 , 2 );
2023-05-29 08:23:39 +02:00
$file = $row [ 'client_id' ] . self :: DELIMITER . $row [ 'id' ];
2023-05-19 11:58:21 +02:00
$timestamp = ( int ) $row [ 'created_on' ];
$date = date ( 'Y-m-d H:i:s' , $timestamp );
$creationTime = str_replace ( '"' , '' , $date );
$entry = [
" id " => $file ,
" size " => ( string ) $row [ 'size' ],
" creation_time " => $creationTime ,
'filename' => $row [ 'filename' ],
'bucket' => $row [ 'bucket' ] ? $row [ 'bucket' ] : '' ,
'client_id' => ( string ) $row [ 'client_id' ],
'attachment_id' => ( string ) $row [ 'id' ],
];
if ( ! isset ( $entries [ $clientId ])) {
$entries [ $clientId ] = [];
}
$entries [ $clientId ][ $file ] = $entry ;
}
}
$result = $result -> nextPage ();
}
// Merge all entries into a single array
$allEntries = [];
foreach ( $entries as $clientEntries ) {
$allEntries = array_merge ( $allEntries , $clientEntries );
}
// Create CSV file for each client ID's physical file entries
foreach ( $entries as $clientId => $clientEntries ) {
$this -> createDBFileCSV ( $clientId , $clientEntries );
}
return $entries ;
}
2023-05-19 19:01:57 +02:00
/**
* Compares a file entry with its corresponding entry in the Cassandra file association .
*
* @ param string $id The ID of the file entry .
* @ param array $data An array containing data of the file entry .
* @ param array $cassandra_file_assoc The Cassandra file association .
* @ return array | null An array containing mismatched entries , or null if the entries match .
*/
2023-05-19 11:58:21 +02:00
2023-05-29 08:23:39 +02:00
private function compareFileEntries ( string $id , array $data , array $cassandra_file_assoc )
2023-05-19 11:58:21 +02:00
{
if ( ! isset ( $cassandra_file_assoc [ $id ])) {
return [
'id' => $id ,
'file1' => [ $id , trim ( $data [ 0 ], '"' ), trim ( $data [ 1 ], '"' )],
];
} else {
$physical_value1 = trim ( $data [ 0 ], '"' );
$physical_value2 = trim ( $data [ 1 ], '"' );
$cassandra_value1 = trim ( $cassandra_file_assoc [ $id ][ 0 ], '"' );
$cassandra_value2 = trim ( $cassandra_file_assoc [ $id ][ 1 ], '"' );
if ( $physical_value1 !== $cassandra_value1 || $physical_value2 !== $cassandra_value2 ) {
return [
'id' => $id ,
'file1' => [ $id , $physical_value1 , $physical_value2 ],
'file2' => [ $id , $cassandra_value1 , $cassandra_value2 ],
];
}
}
return null ;
}
2023-05-19 19:01:57 +02:00
/**
* Retrieves the lines of a file and returns them as an array .
*
* @ param string $file The path to the file .
* @ return array An array containing the lines of the file .
*/
2023-05-29 08:23:39 +02:00
private function getFileLines ( string $file ) : array
2023-05-19 11:58:21 +02:00
{
$file_contents = file_get_contents ( $file );
return explode ( " \n " , $file_contents );
}
2023-05-19 19:01:57 +02:00
/**
* Filters out null entries and maps the remaining entries to their first element .
*
* @ param array $entries An array containing entries to be filtered and mapped .
* @ return array An array of filtered and mapped entries .
*/
2023-05-29 08:23:39 +02:00
private function filterAndMapEntries ( array $entries ) : array
2023-05-19 11:58:21 +02:00
{
$filtered_entries = array_filter ( $entries );
$mapped_entries = array_map ( function ( $entry ) {
return $entry [ 0 ];
}, $filtered_entries );
return array_values ( $mapped_entries );
}
2023-05-19 19:01:57 +02:00
/**
* Processes files in a directory , performs comparisons , and generates CSV and HTML reports .
*
* @ param string $dir The directory path containing the files to be processed .
* @ return void
*/
2023-05-29 08:23:39 +02:00
private function process_files_in_directory ( string $dir ) : void
2023-05-19 11:58:21 +02:00
{
$files = glob ( $dir . '/*.csv' );
$physical_files = array ();
$cassandra_files = array ();
foreach ( $files as $file ) {
2023-05-29 08:23:39 +02:00
$filename = basename ( $file , static :: CSV_EXTENSION );
$csv_type = substr (
$filename ,
0 ,
strpos ( $filename , '_' )
);
$file_num = substr (
$filename ,
strpos ( $filename , '_' ) + 1
);
if ( $csv_type === 'physical' ) {
2023-05-19 11:58:21 +02:00
$physical_files [ $file_num ] = $file ;
2023-05-29 08:23:39 +02:00
} elseif ( $csv_type === 'cassandra' ) {
2023-05-19 11:58:21 +02:00
$cassandra_files [ $file_num ] = $file ;
}
}
ksort ( $physical_files );
ksort ( $cassandra_files );
$missing_physical_files = array ();
$missing_cassandra_entries = array ();
$cassandra_entries = [];
$physical_entries = [];
foreach ( $physical_files as $file_num => $physical_file ) {
2023-05-29 08:23:39 +02:00
$cassandra_file = $dir . '/cassandra_' . $file_num . self :: CSV_EXTENSION ;
2023-05-19 11:58:21 +02:00
if ( file_exists ( $physical_file ) && file_exists ( $cassandra_file )) {
$compared_physical [] = $this -> compare_csv_files ( $physical_file , $cassandra_file );
$compared_cassandra [] = $this -> compare_csv_files ( $cassandra_file , $physical_file );
$physical_entries = $this -> filterAndMapEntries ( $compared_cassandra );
$cassandra_entries = $this -> filterAndMapEntries ( $compared_physical );
} else {
2023-05-19 19:01:57 +02:00
if ( ! file_exists ( $physical_file )) {
$missing_physical_files [] = $physical_file ;
2023-05-19 11:58:21 +02:00
}
if ( ! file_exists ( $cassandra_file )) {
$missing_cassandra_files [] = $cassandra_file ;
}
}
if ( ! file_exists ( $cassandra_file )) {
$physical_file_lines = $this -> getFileLines ( $physical_file );
$file_assoc = array ();
foreach ( $physical_file_lines as $line ) {
if ( $line === reset ( $physical_file_lines )) {
continue ;
}
2023-05-29 08:23:39 +02:00
$values = explode ( ',' , $line );
if ( count ( $values ) === self :: NUM_FIELDS ) {
2023-05-19 11:58:21 +02:00
$values [ 2 ] = str_replace ( '"' , '' , $values [ 2 ]);
$file_assoc [ $values [ 0 ]] = [ $values [ 1 ], $values [ 2 ]];
}
}
2023-05-19 19:01:57 +02:00
foreach ( $file_assoc as $id => $data ) {
2023-05-19 11:58:21 +02:00
if ( ! isset ( $cassandra_files_assoc [ $id ])) {
$missing_cassandra_entries [] = [
'id' => $id ,
'file1' => [ $id , $data [ 0 ], $data [ 1 ]],
];
}
}
} else {
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
}
}
foreach ( $cassandra_files as $file_num => $cassandra_file ) {
2023-05-29 08:23:39 +02:00
$physical_file = $dir . '/physical_' . $file_num . self :: CSV_EXTENSION ;
2023-05-19 11:58:21 +02:00
if ( ! file_exists ( $physical_file )) {
2023-05-19 19:01:57 +02:00
$cassandra_file_lines = $this -> getFileLines ( $cassandra_file );
2023-05-19 11:58:21 +02:00
$file_assoc = $this -> buildFileAssociation ( $cassandra_file_lines );
2023-05-19 19:01:57 +02:00
foreach ( $file_assoc as $id => $data ) {
2023-05-19 11:58:21 +02:00
if ( ! isset ( $physical_files_assoc [ $id ])) {
$missing_physical_files [] = [
'id' => $id ,
2023-05-19 19:01:57 +02:00
'file1' => [ $id , $data [ 0 ], $data [ 1 ], $data [ 2 ], $data [ 3 ], $data [ 4 ], $data [ 5 ]],
2023-05-19 11:58:21 +02:00
];
}
}
}
}
2023-05-19 19:01:57 +02:00
$result_from_cassandra_entries = array_unique ( array_merge ( $cassandra_entries , $missing_cassandra_entries ), SORT_REGULAR );
$result_from_physical_files = array_unique ( array_merge ( $physical_entries , $missing_physical_files ), SORT_REGULAR );
2023-05-29 08:23:39 +02:00
$this -> generateCsvReportForDbEntries ( $result_from_cassandra_entries , self :: CASSANDRA_RESULT_CSV );
$this -> generateHtmlReport ( $result_from_cassandra_entries , self :: CASSANDRA_HTML_REPORT );
$this -> generateCsvReportForPhysicalFiles ( $result_from_physical_files , self :: PHYSICAL_RESULT_CSV );
$this -> generateHtmlReport ( $result_from_physical_files , self :: PHYSICAL_HTML_REPORT );
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
}
2023-05-29 08:23:39 +02:00
2023-05-19 19:01:57 +02:00
/**
* Builds an associative array from file lines .
*
* @ param array $file_lines An array containing lines of a file .
* @ return array An associative array representing the file association .
*/
2023-05-29 08:23:39 +02:00
private function buildFileAssociation ( array $file_lines ) : array
2023-05-19 11:58:21 +02:00
{
$file_assoc = [];
foreach ( $file_lines as $line ) {
if ( $line === reset ( $file_lines )) {
continue ;
}
2023-05-29 08:23:39 +02:00
$values = explode ( ',' , $line );
if ( count ( $values ) === self :: NUM_FIELDS ) {
2023-05-19 11:58:21 +02:00
$file_assoc [ $values [ 0 ]] = [ $values [ 1 ], $values [ 2 ]];
}
2023-05-29 08:23:39 +02:00
if ( count ( $values ) > 3 ) {
2023-05-19 19:01:57 +02:00
$file_assoc [ $values [ 0 ]] = [ $values [ 1 ], $values [ 2 ], $values [ 3 ], $values [ 4 ], $values [ 5 ], $values [ 6 ]];
2023-05-19 11:58:21 +02:00
}
}
return $file_assoc ;
}
2023-05-19 19:01:57 +02:00
/**
* Compares two CSV files and returns missing entries or entries with mismatched data .
*
* @ param string $file1_path The file path of the first CSV file .
* @ param string $file2_path The file path of the second CSV file .
* @ return array An array containing missing entries or entries with mismatched data .
*/
2023-05-19 11:58:21 +02:00
2023-05-29 08:23:39 +02:00
private function compare_csv_files ( string $file1_path , string $file2_path ) : array
2023-05-19 11:58:21 +02:00
{
$file1_data = array_map ( 'str_getcsv' , file ( $file1_path ));
$file2_data = array_map ( 'str_getcsv' , file ( $file2_path ));
$file1_headers = array_shift ( $file1_data );
$file2_headers = array_shift ( $file2_data );
// find indexes of columns in each file
$id_index_1 = array_search ( 'id' , $file1_headers );
$id_index_2 = array_search ( 'id' , $file2_headers );
$size_index_1 = array_search ( 'size' , $file1_headers );
$size_index_2 = array_search ( 'size' , $file2_headers );
$time_index_1 = array_search ( 'creation_time' , $file1_headers );
$time_index_2 = array_search ( 'creation_time' , $file2_headers );
$filename_index_1 = array_search ( 'filename' , $file1_headers );
$filename_index_2 = array_search ( 'filename' , $file2_headers );
$clientid_index_1 = array_search ( 'client_id' , $file1_headers );
$bucket_index_1 = array_search ( 'bucket' , $file1_headers );
$attachment_id_index_1 = array_search ( 'attachment_id' , $file1_headers );
2023-05-29 08:23:39 +02:00
$file1_assoc = array_reduce (
$file1_data , function ( $result , $row ) use (
$id_index_1 ,
$size_index_1 ,
$time_index_1 ,
$filename_index_1 ,
$clientid_index_1 ,
$bucket_index_1 ,
$attachment_id_index_1
) {
2023-05-19 11:58:21 +02:00
$result [ $row [ $id_index_1 ]] = [
'id' => $row [ $id_index_1 ],
'file1' => [
$row [ $id_index_1 ],
$row [ $size_index_1 ],
$row [ $time_index_1 ],
isset ( $row [ $filename_index_1 ]) ? $row [ $filename_index_1 ] : null ,
isset ( $row [ $clientid_index_1 ]) ? $row [ $clientid_index_1 ] : null ,
2023-05-19 19:01:57 +02:00
isset ( $row [ $bucket_index_1 ]) ? $row [ $bucket_index_1 ] : null ,
2023-05-19 11:58:21 +02:00
isset ( $row [ $attachment_id_index_1 ]) ? $row [ $attachment_id_index_1 ] : null
],
];
return $result ;
}, []);
2023-05-29 08:23:39 +02:00
$file2_assoc = array_reduce (
$file2_data , function ( $result , $row ) use (
$id_index_2 ,
$size_index_2 ,
$time_index_2 ,
$filename_index_2
) {
2023-05-19 11:58:21 +02:00
$result [ $row [ $id_index_2 ]] = [
'id' => $row [ $id_index_2 ],
'file2' => [
$row [ $id_index_2 ],
$row [ $size_index_2 ],
$row [ $time_index_2 ],
isset ( $row [ $filename_index_2 ]) ? $row [ $filename_index_2 ] : null
],
];
return $result ;
}, []);
$missing_entries = [];
foreach ( $file1_assoc as $id => $data ) {
if ( ! isset ( $file2_assoc [ $id ])) {
$missing_entries [] = [
'id' => $id ,
'file1' => $data [ 'file1' ],
];
} else {
$file2_data = $file2_assoc [ $id ][ 'file2' ];
2023-05-19 19:01:57 +02:00
if ( $data [ 'file1' ][ 0 ] !== $file2_data [ 0 ] || $data [ 'file1' ][ 1 ] !== $file2_data [ 1 ]) {
2023-05-19 11:58:21 +02:00
$missing_entries [] = [
'id' => $id ,
'file1' => $data [ 'file1' ],
'file2' => $file2_data ,
];
}
}
}
return $missing_entries ;
}
/**
* Generates a CSV report of inconsistent files .
*
* @ param array $inconsistentFiles An array of inconsistent files .
* @ param string $filename The filename to use for the report .
*/
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
private function generateCsvReportForPhysicalFiles ( array $inconsistentFiles , string $name ) : void
{
$fp = fopen ( $name , 'w' );
2023-05-29 08:23:39 +02:00
fputcsv ( $fp , [
self :: CSV_COLUMN_FILE_ATTACHMENT ,
self :: CSV_COLUMN_FILE_PATH ,
self :: CSV_COLUMN_FILE_NAME ,
self :: CSV_COLUMN_THUMB1 ,
self :: CSV_COLUMN_THUMB2 ,
self :: CSV_COLUMN_SIZE ,
self :: CSV_COLUMN_CREATION_TIME ,
self :: CSV_COLUMN_CLIENT_ID ,
self :: CSV_COLUMN_BUCKET ,
self :: CSV_COLUMN_ID
]);
2023-05-19 11:58:21 +02:00
foreach ( $inconsistentFiles as $row ) {
$check_value = $row [ 'id' ];
2023-05-19 19:01:57 +02:00
$filePath = $check_value ? $check_value : $row [ 'file2' ][ 0 ];
2023-05-19 11:58:21 +02:00
if ( is_numeric ( $row [ 'id' ])) {
2023-05-19 19:01:57 +02:00
$filePath = $row [ 'file1' ][ 3 ];
2023-05-19 11:58:21 +02:00
$check_value = $row [ 'file1' ][ 3 ];
2023-05-19 19:01:57 +02:00
}
2023-05-19 11:58:21 +02:00
$size = isset ( $row [ 'file1' ][ 1 ]) ? ( string ) $row [ 'file1' ][ 1 ] : filesize ( $filePath );
$creationTime = isset ( $row [ 'file1' ][ 2 ]) ? ( string ) $row [ 'file1' ][ 2 ] : date ( 'Y-m-d H:i:s' , filectime ( $filePath ));
$thumb1 = isset ( $row [ 'thumb1' ]) ? $row [ 'thumb1' ] : '' ;
$thumb2 = isset ( $row [ 'thumb2' ]) ? $row [ 'thumb2' ] : '' ;
$clientId = isset ( $row [ 'file1' ][ 4 ]) ? $row [ 'file1' ][ 4 ] : '' ;
$bucket = isset ( $row [ 'file1' ][ 5 ]) ? $row [ 'file1' ][ 5 ] : '' ;
$attachmentId = isset ( $row [ 'file1' ][ 5 ]) ? $row [ 'file1' ][ 6 ] : '' ;
fputcsv ( $fp , [
'Attachment' ,
$filePath ,
$check_value ,
$thumb1 ,
$thumb2 ,
$size ,
$creationTime ,
$clientId ,
$bucket ,
$attachmentId ,
]);
}
fclose ( $fp );
chmod ( $name , 0666 );
}
2023-05-19 19:01:57 +02:00
/**
* Generates a CSV report for inconsistent database entries .
*
* @ param array $inconsistentFiles An array containing inconsistent file data .
* @ param string $name The name of the CSV report file to be generated .
* @ return void
*/
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
private function generateCsvReportForDbEntries ( array $inconsistentFiles , string $name ) : void
{
$fp = fopen ( $name , 'w' );
2023-05-29 08:23:39 +02:00
fputcsv ( $fp , [
self :: CSV_COLUMN_FILE_ATTACHMENT ,
'Entry Path' ,
'Entry Name' ,
self :: CSV_COLUMN_THUMB1 ,
self :: CSV_COLUMN_THUMB2 ,
self :: CSV_COLUMN_SIZE ,
self :: CSV_COLUMN_CREATION_TIME
]);
2023-05-19 11:58:21 +02:00
foreach ( $inconsistentFiles as $row ) {
2023-05-19 19:01:57 +02:00
$filePath = $this -> directory . '/' . $row [ 'file1' ][ 0 ] ? $this -> directory . '/' . $row [ 'file1' ][ 0 ] : $this -> directory . '/' . $row [ 'file2' ][ 0 ];
2023-05-19 11:58:21 +02:00
$size = isset ( $row [ 'file1' ][ 1 ]) ? ( string ) $row [ 'file1' ][ 1 ] : filesize ( $filePath );
$creationTime = isset ( $row [ 'file1' ][ 2 ]) ? ( string ) $row [ 'file1' ][ 2 ] : date ( 'Y-m-d H:i:s' , filectime ( $filePath ));
2023-05-19 19:01:57 +02:00
$thumb1 = $row [ 'file1' ][ 0 ] . '-thumb1' ;
$thumb2 = $row [ 'file1' ][ 0 ] . '-thumb2' ;
if ( is_string ( $row [ 'id' ]) && strpos ( $row [ 'id' ], '.' ) !== false ) {
$old_attachment = explode ( '.' , $row [ 'id' ])[ 0 ];
$thumb1 = '' ;
$thumb2 = '' ;
}
2023-05-19 11:58:21 +02:00
fputcsv ( $fp , [
2023-05-19 19:01:57 +02:00
'File' ,
2023-05-19 11:58:21 +02:00
$filePath ,
$row [ 'file1' ][ 0 ],
$thumb1 ,
$thumb2 ,
$size ,
2023-05-19 19:01:57 +02:00
$creationTime
2023-05-19 11:58:21 +02:00
]);
}
fclose ( $fp );
chmod ( $name , 0666 );
}
/**
* Generates an HTML report of inconsistent files .
*
* @ param array $inconsistentFiles An array of inconsistent files .
* @ param string $filename The filename to use for the report .
*/
2023-05-29 08:23:39 +02:00
private function generateHtmlReport ( array $inconsistentFiles , string $name ) : void
{
$templateFile = 'report_template.html' ;
$templateContent = file_get_contents ( $templateFile );
if ( $templateContent === false ) {
throw new Exception ( 'Failed to read the HTML template file.' );
}
$tableRows = '' ;
foreach ( $inconsistentFiles as $row ) {
$filename = $row [ 'file1' ][ 0 ];
$filePath = $this -> directory . '/' . $filename ;
$size = isset ( $row [ 'file1' ][ 1 ]) ? ( string ) $row [ 'file1' ][ 1 ] : filesize ( $filePath );
$creationTime = isset ( $row [ 'file1' ][ 2 ]) ? str_replace ( '"' , '' , $row [ 'file1' ][ 2 ]) : date ( 'Y-m-d H:i:s' , filectime ( $filePath ));
$thumb1 = $row [ 'file1' ][ 0 ] . '-thumb1' ;
$thumb2 = $row [ 'file1' ][ 0 ] . '-thumb2' ;
if ( isset ( $row [ 'file1' ][ 3 ]) && preg_match ( '/^[0-9]+\./' , $row [ 'file1' ][ 3 ])) {
$filename = $row [ 'file1' ][ 3 ];
$filePath = $this -> directory . '/' . $filename ;
$thumb1 = '' ;
$thumb2 = '' ;
2023-05-19 11:58:21 +02:00
}
2023-05-29 08:23:39 +02:00
if ( is_string ( $row [ 'id' ]) && strpos ( $row [ 'id' ], '.' ) !== false ) {
$filename = $row [ 'id' ];
$filePath = $this -> directory . '/' . $filename ;
$thumb1 = '' ;
$thumb2 = '' ;
}
$tableRows .= '<tr><td>Attachment</td><td>' . htmlspecialchars ( $filePath ) . '</td><td>' . htmlspecialchars ( $filename ) . '</td><td>' . htmlspecialchars ( $thumb1 ) . '</td><td>' . htmlspecialchars ( $thumb2 ) . '</td><td>' . htmlspecialchars ( $size ) . '</td><td>' . htmlspecialchars ( $creationTime ) . '</td></tr>' ;
}
$html = str_replace ( '{{table_rows}}' , $tableRows , $templateContent );
$file = fopen ( $name , 'w' );
if ( ! $file ) {
throw new Exception ( 'Failed to open the file for writing.' );
}
fwrite ( $file , $html );
fclose ( $file );
}
2023-05-19 11:58:21 +02:00
/**
* Returns cassandra schema version
*
*
* @ return int
*/
2023-05-29 08:23:39 +02:00
private function schema_version () : int
2023-05-19 11:58:21 +02:00
{
return static :: $schemaVersion ;
}
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
/**
* Returns info ( id , size ) for attachment file
*
* @ param int $clientId
* @ param string | null $bucketId
* @ param string $id
2023-05-29 08:23:39 +02:00
* @ return object | null
2023-05-19 11:58:21 +02:00
*/
2023-05-29 08:23:39 +02:00
private function get_info ( int $clientId , ? string $bucketId , string $id ) : object
2023-05-19 11:58:21 +02:00
{
$attachment = null ;
$args = [
'client_id' => $clientId ,
'id' => $id
];
if ( $this -> schema_version () == 1 ) {
2023-05-19 19:01:57 +02:00
$query = $this -> cassandra -> prepare ( 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ?' );
} else {
$q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\'' ;
echo " Q: [ " . $q . " ] \n " ;
2023-05-19 11:58:21 +02:00
$query = $this -> cassandra -> prepare ( $q );
}
$res = $this -> cassandra -> execute (
$query ,
[
'arguments' => $args
]
);
if ( $res && $res -> valid ()) {
2023-05-19 19:01:57 +02:00
$tmp = $res -> current ();
$attachment = ( object ) $tmp ;
$attachment -> id = ( string ) $tmp [ 'id' ];
if ( array_key_exists ( 'size' , $tmp )) {
$attachment -> size = ( int ) $tmp [ 'size' ];
}
}
2023-05-19 11:58:21 +02:00
return $attachment ;
}
/**
* prepares 'bucket' field for partitioning
*
* @ param string $id
* @ return string
*/
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
private function _set_bucket ( string $id ) : string
{
return substr ( $id , 0 , static :: $bucketMagic );
}
2023-05-19 19:01:57 +02:00
2023-05-19 11:58:21 +02:00
/**
* updates attachment count and size
*
* @ param boolean $add
* @ param integer $size
*
* @ return void
*/
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
private function _update_attachment_stats ( bool $add , int $size ) : void
{
2023-05-29 08:23:39 +02:00
$op = $add ? '+' : self :: DELIMITER ;
2023-05-19 11:58:21 +02:00
$query = $this -> cassandra -> prepare ( 'UPDATE attachment_stats SET count = count ' . $op . ' 1 where client_id = ?' );
$this -> cassandra -> execute ( $query , [ 'arguments' => [ 'client_id' => ( int ) static :: $clientId ]]);
$query = $this -> cassandra -> prepare (
'UPDATE attachment_stats SET size = size ' . $op . ' ' . $size . ' where client_id = ?'
);
$this -> cassandra -> execute ( $query , [ 'arguments' => [ 'client_id' => static :: $clientId ]]);
}
/**
* returns full attachment table key for given id
*
* @ param string $id
*
* @ return object | null
*/
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
private function _get_attachment_key ( string $id ) : ? object
{
$result = null ;
$query = $this -> cassandra -> prepare ( 'SELECT * from attachment_ids where id = ? AND bucket = ? AND client_id = ?' );
$arguments = [
'client_id' => static :: $clientId ,
'bucket' => $this -> _set_bucket ( $id ),
'id' => $id ,
2023-05-19 19:01:57 +02:00
];
$data = $this -> cassandra -> execute ( $query , [ 'arguments' => $arguments ]);
2023-05-19 11:58:21 +02:00
if ( $data && $data -> valid ()) {
$result = ( object ) $data -> current ();
2023-05-19 19:01:57 +02:00
}
2023-05-19 11:58:21 +02:00
return $result ;
}
/**
* returns single attachment data for provided ID
*
* @ param string $attachmentId
*
2023-05-29 08:23:39 +02:00
* @ return object | null
2023-05-19 11:58:21 +02:00
*/
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
private function _get_attachment_by_id ( string $attachmentId ) : ? object
{
$attachment = null ;
try {
2023-05-19 19:01:57 +02:00
$properties = '*' ;
$key = $this -> _get_attachment_key ( $attachmentId );
2023-05-19 11:58:21 +02:00
if ( $key ) {
if ( $this -> schema_version () === 1 ) {
2023-05-19 19:01:57 +02:00
$query = $this -> cassandra -> prepare ( 'SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' );
2023-05-19 11:58:21 +02:00
$arguments = [
'client_id' => static :: $clientId ,
'id' => $key -> id ,
'project_id' => $key -> project_id ,
'entity_type' => $key -> entity_type ,
];
} else {
$query = $this -> cassandra -> prepare ( 'SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?' );
$arguments = [
'client_id' => static :: $clientId ,
'id' => $key -> id ,
'project_id' => $key -> project_id ,
'entity_id' => $key -> entity_id ,
'entity_type' => $key -> entity_type ,
];
}
$attachment = $this -> cassandra -> execute ( $query , [ 'arguments' => $arguments ]);
}
} catch ( Cassandra\Exception\InvalidArgumentException $e ) {
2023-05-19 19:01:57 +02:00
}
2023-05-19 11:58:21 +02:00
return $attachment != null && $attachment -> valid () ? $this -> _convert_to_object ( $attachment -> current ()) : null ;
}
/**
* deletes attachment_ids entry
*
* @ param string $id
*/
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
private function _delete_attachment_key ( string $id ) : void
{
2023-05-29 08:23:39 +02:00
$this -> cassandra -> executeAsync (
$this -> cassandra -> prepare ( 'DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?' ),
[
'arguments' => [
'client_id' => static :: $clientId ,
'bucket' => $this -> _set_bucket ( $id ),
'id' => $id
]
]
);
2023-05-19 11:58:21 +02:00
}
/**
* updates attachment data references
*
* @ param string $data_id
* @ param bool $add
* @ param string $attachmentId
*
* @ return void
*/
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
private function _update_file_refs ( string $data_id , bool $add , string $attachmentId = '' ) : void
{
$queryArguments = [
'client_id' => static :: $clientId ,
'bucket' => $this -> _set_bucket ( $data_id ),
'id' => $data_id ,
];
2023-05-29 08:23:39 +02:00
$query = $this -> cassandra -> prepare ( 'UPDATE attachment_file_refs SET ref_count = ref_count ' . ( $add ? '+' : self :: DELIMITER ) . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?' );
2023-05-19 11:58:21 +02:00
$this -> cassandra -> executeAsync ( $query , [ 'arguments' => $queryArguments ]);
if ( ! empty ( $attachmentId )) {
$queryArguments [ 'attachment_id' ] = $attachmentId ;
if ( $add ) {
$query = $this -> cassandra -> prepare ( 'INSERT INTO attachment_file_ids (client_id,bucket,id,attachment_id) VALUES(?,?,?,?)' );
} else {
$query = $this -> cassandra -> prepare ( 'DELETE FROM attachment_file_ids WHERE client_id = ? AND bucket = ? AND attachment_id = ? AND id = ?' );
}
$this -> cassandra -> executeAsync ( $query , [ 'arguments' => $queryArguments ]);
}
}
/**
* Deletes attachment from Cassandra
*
*
* @ param int $clientId
* @ param string | null $bucketId
* @ param string $id
*
* @ return bool
*/
2023-05-29 08:23:39 +02:00
2023-05-19 11:58:21 +02:00
public function deleteAttachment ( int $clientId , ? string $bucketId , string $id ) : bool
{
2023-05-19 19:01:57 +02:00
static :: $clientId = $clientId ;
2023-05-19 11:58:21 +02:00
$refData = [
'client_id' => $clientId ,
'id' => $id ,
];
2023-05-19 19:01:57 +02:00
$fileInfo = $this -> get_info ( $clientId , $bucketId , $id );
2023-05-19 11:58:21 +02:00
if ( $fileInfo ) {
$this -> _update_attachment_stats ( false , $fileInfo -> size );
2023-05-19 19:01:57 +02:00
} else {
return false ;
}
$q = 'DELETE FROM attachment_file_info WHERE id = ? AND client_id = ? ' ;
if ( $this -> schema_version () === 2 ) {
$q = $q . ' AND bucket = \'' . $bucketId . '\'' ;
}
2023-05-19 11:58:21 +02:00
$query = $this -> cassandra -> prepare ( $q );
2023-05-19 19:01:57 +02:00
$this -> cassandra -> execute ( $query , [ 'arguments' => $refData ]);
2023-05-19 11:58:21 +02:00
$refData [ 'bucket' ] = $this -> _set_bucket ( $id );
$query = $this -> cassandra -> prepare (
'DELETE FROM attachment_file_refs WHERE bucket = ? AND id = ? AND client_id = ?'
);
$result = $this -> cassandra -> execute ( $query , [ 'arguments' => $refData ]);
// get all attachments with deleted file and remove them
$query = $this -> cassandra -> prepare (
'SELECT attachment_id FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?'
);
$result = $this -> cassandra -> execute ( $query , [ 'arguments' => $refData ]);
if ( $this -> schema_version () === 1 ) {
$delQuery = $this -> cassandra -> prepare (
'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?'
);
} else {
$delQuery = $this -> cassandra -> prepare (
'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?'
);
2023-05-19 19:01:57 +02:00
}
2023-05-19 11:58:21 +02:00
while ( $result && $result -> valid ()) {
$attachmentId = $result -> current ()[ 'attachment_id' ];
2023-05-19 19:01:57 +02:00
$key = $this -> _get_attachment_key (( string ) $attachmentId );
if ( $key == null ) {
$result -> next ();
continue ;
}
if ( $this -> schema_version () === 1 ) {
2023-05-19 11:58:21 +02:00
$selectQuery = $this -> cassandra -> prepare (
'SELECT entity_id FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?'
);
$attachment = $this -> cassandra -> execute ( $selectQuery , [
'arguments' => [
'client_id' => static :: $clientId ,
'project_id' => $key -> project_id ,
'entity_type' => $key -> entity_type ,
'id' => $attachmentId ,
]
]);
$entity_id = $attachment -> current ()[ 'entity_id' ];
$delArgs = [
'client_id' => static :: $clientId ,
'project_id' => $key -> project_id ,
'entity_type' => $key -> entity_type ,
'id' => $attachmentId ,
];
} else {
$entity_id = $key -> entity_id ;
$delArgs = [
'client_id' => static :: $clientId ,
'project_id' => $key -> project_id ,
'entity_type' => $key -> entity_type ,
'entity_id' => $entity_id ,
'id' => $attachmentId ,
];
}
$this -> _delete_attachment_key (( string ) $attachmentId );
$this -> cassandra -> execute ( $delQuery , [ 'arguments' => $delArgs ]);
$result -> next ();
}
$query = $this -> cassandra -> prepare (
'DELETE FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?'
);
2023-05-19 19:01:57 +02:00
$this -> cassandra -> execute ( $query , [ 'arguments' => $refData ]);
$result = false ;
$attachment = $this -> _get_attachment_by_id ( $id );
if ( $attachment ) {
if ( $this -> schema_version () === 1 ) {
$query = $this -> cassandra -> prepare ( " DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_type = ? AND client_id = ? " );
$arguments = [
'arguments' => [
'client_id' => static :: $clientId ,
'id' => $id ,
'project_id' => $attachment -> project_id ,
'entity_type' => $attachment -> entity_type
],
];
} else {
$query = $this -> cassandra -> prepare ( " DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_id = ? AND entity_type = ? AND client_id = ? " );
$arguments = [
'arguments' => [
'client_id' => static :: $clientId ,
'id' => $id ,
'project_id' => $attachment -> project_id ,
'entity_type' => $attachment -> entity_type ,
'entity_id' => $attachment -> entity_id
],
];
}
$queryResult = $this -> cassandra -> execute ( $query , $arguments ) != null ;
if ( $queryResult ) {
$result = true ;
$this -> _update_file_refs ( $attachment -> data_id , false , $attachment -> id );
$this -> _delete_attachment_key ( $id );
}
}
2023-05-19 11:58:21 +02:00
return $result ;
}
/**
* Deletes attachments provided in file
*
*
* @ param string $file
* @ param string $src ( 'cassandra' / 'file' )
*
* @ return void
*
* example file :
*
* cassandra , / test1 / testx , testx , testx - thumb1 , testx - thumb2 , 4343 , 20 - 02 - 22 13 : 30 , 1 , a , abc - def
* cassandra , / test1 / testx1 , testx1 , testx1 - thumb1 , testx1 - thumb2 , 4343 , 20 - 02 - 22 13 : 30 , 1 , 1 , 1 abc - def
* file , / tmp / testx3f , testx3f , testx3f - thumb1 , testx3f - thumb2 , 4343 , 20 - 02 - 22 13 : 30 ,,,
* cassandra , / test1 / testx3 , testx3 , testx3 - thumb1 , testx3 - thumb2 , 4343 , 20 - 02 - 22 13 : 30 , 1 , 3 , 3 abc - def
*/
2023-05-29 08:23:39 +02:00
2023-05-19 19:01:57 +02:00
public function processAttachmentDeletionCSV ( string $file , string $src ) : void
2023-05-19 11:58:21 +02:00
{
echo " Before proceeding with the deletion, make sure you have a backup of your data. " . PHP_EOL ;
echo " You can revert back to the backup in case of accidental data loss. " . PHP_EOL ;
echo " Do you want to delete the physical files listed in the CSV report? (yes/no): " ;
$confirmation = trim ( fgets ( STDIN ));
if ( strtolower ( $confirmation ) !== 'yes' ) {
return ;
}
$file_contents = file_get_contents ( $file );
$file_lines = explode ( " \n " , $file_contents );
$logFile = 'deleted_files.log' ;
$logHandle = fopen ( $logFile , 'a' );
foreach ( $file_lines as $line ) {
if ( $line === reset ( $file_lines )) {
continue ;
}
2023-05-29 08:23:39 +02:00
$values = explode ( ',' , $line );
2023-05-19 11:58:21 +02:00
if ( $values [ 0 ] === 'File/Attachment' ) {
2023-05-19 19:01:57 +02:00
continue ;
2023-05-19 11:58:21 +02:00
}
if ( count ( $values ) >= 7 ) {
$data = ( object ) array ();
2023-05-19 19:01:57 +02:00
$data -> source = $values [ 0 ];
2023-05-19 11:58:21 +02:00
$data -> path = $values [ 1 ];
$data -> name = $values [ 2 ];
$data -> thumb1 = $values [ 3 ];
$data -> thumb2 = $values [ 4 ];
$data -> size = $values [ 5 ];
$data -> created = $values [ 6 ];
2023-05-19 19:01:57 +02:00
$path = dirname ( $data -> path );
2023-05-29 08:23:39 +02:00
2023-05-19 19:01:57 +02:00
if ( count ( $values ) >= 10 ) {
2023-05-19 11:58:21 +02:00
$data -> clientId = ( int ) $values [ 7 ];
$data -> bucket = $values [ 8 ];
$data -> id = $values [ 9 ];
}
2023-05-29 08:23:39 +02:00
2023-05-19 19:01:57 +02:00
if ( $data -> source === 'Attachment' && $src === 'cassandra' ) {
echo " will delete " . $data -> clientId . " : " . $data -> bucket . " : " . $data -> id . PHP_EOL ;
$this -> deleteAttachment ( $data -> clientId , $data -> bucket , $data -> id );
fwrite ( $logHandle , " Deleted attachment: $data->id " . PHP_EOL );
} else if ( $data -> source === 'File' && $src === 'file' ) {
$filePath = $values [ 1 ];
$thumb1Path = $path . '/' . $values [ 3 ];
$thumb2Path = $path . '/' . $values [ 4 ];
2023-05-29 08:23:39 +02:00
2023-05-19 19:01:57 +02:00
if ( file_exists ( $thumb1Path ) && file_exists ( $thumb2Path )) {
files :: delete ( $thumb1Path );
files :: delete ( $thumb2Path );
fwrite ( $logHandle , " Deleted thumbnail: " . $thumb1Path . PHP_EOL );
fwrite ( $logHandle , " Deleted thumbnail: " . $thumb2Path . PHP_EOL );
}
2023-05-29 08:23:39 +02:00
2023-05-19 19:01:57 +02:00
if ( file_exists ( $filePath )) {
unlink ( $filePath );
unlink ( $thumb1Path );
unlink ( $thumb2Path );
echo " File deleted: $filePath " . PHP_EOL ;
2023-05-19 11:58:21 +02:00
// Write the deleted file path to the log file
2023-05-19 19:01:57 +02:00
fwrite ( $logHandle , " Deleted file: $filePath " . PHP_EOL );
} else {
echo " File not found: $filePath " . PHP_EOL ;
2023-05-19 11:58:21 +02:00
}
}
}
}
fclose ( $logHandle );
echo " Deletion completed. The list of attachments is saved in ' $logFile '. " . PHP_EOL ;
}
}
$checker = new DataConsistencyChecker ();
2023-05-19 19:01:57 +02:00
$checker -> checkConsistency ( " attachment_file_info " , true );