diff --git a/elodie.py b/elodie.py index 3540fdb..928c19a 100755 --- a/elodie.py +++ b/elodie.py @@ -3,6 +3,7 @@ import os import re import sys +import logging from datetime import datetime import click @@ -22,19 +23,24 @@ from elodie.config import load_config from elodie.filesystem import FileSystem from elodie.localstorage import Db from elodie.media.media import Media, get_all_subclasses -from elodie.media.media import Media from elodie.media.audio import Audio from elodie.media.photo import Photo from elodie.media.video import Video from elodie.plugins.plugins import Plugins from elodie.result import Result +from elodie.summary import Summary from elodie.external.pyexiftool import ExifTool from elodie.dependencies import get_exiftool from elodie import constants FILESYSTEM = FileSystem() -def import_file(_file, destination, db, album_from_folder, action, trash, allow_duplicates): + +def print_help(command): + click.echo(command.get_help(click.Context(sort))) + + +def import_file(_file, destination, db, album_from_folder, mode, trash, allow_duplicates): """Set file metadata and move it to destination. """ @@ -57,7 +63,7 @@ def import_file(_file, destination, db, album_from_folder, action, trash, allow_ return dest_path = FILESYSTEM.process_file(_file, destination, db, - media, album_from_folder, action, allowDuplicate=allow_duplicates) + media, album_from_folder, mode, allowDuplicate=allow_duplicates) if dest_path: log.all('%s -> %s' % (_file, dest_path)) if trash: @@ -65,6 +71,7 @@ def import_file(_file, destination, db, album_from_folder, action, trash, allow_ return dest_path or None + @click.command('batch') @click.option('--debug', default=False, is_flag=True, help='Override the value in constants.py with True.') @@ -101,9 +108,9 @@ def _import(destination, source, file, album_from_folder, trash, """Import files or directories by reading their EXIF and organizing them accordingly. """ if dry_run: - action = 'dry_run' + mode = 'dry_run' else: - action = 'copy' + mode = 'copy' constants.debug = debug has_errors = False @@ -142,7 +149,7 @@ def _import(destination, source, file, album_from_folder, trash, for current_file in files: dest_path = import_file(current_file, destination, db, - album_from_folder, action, trash, allow_duplicates) + album_from_folder, mode, trash, allow_duplicates) result.append((current_file, dest_path)) has_errors = has_errors is True or not dest_path else: @@ -154,6 +161,97 @@ def _import(destination, source, file, album_from_folder, trash, if has_errors: sys.exit(1) + +# TODO +# recursive : bool +# True if you want src_dir to be searched recursively for files (False to search only in top-level of src_dir) + + +@click.command('sort') +@click.option('--debug', default=False, is_flag=True, + help='Override the value in constants.py with True.') +@click.option('--dry-run', default=False, is_flag=True, + help='Dry run only, no change made to the filesystem.') +@click.option('--destination', '-d', type=click.Path(file_okay=False), + default=None, help='Sort files into this directory.') +@click.option('--copy', '-c', default=False, is_flag=True, + help='True if you want files to be copied over from src_dir to\ + dest_dir rather than moved') +@click.option('--day-begins', '-b', default=0, + help='What hour of the day you want the day to begin (only for\ + classification purposes). Defaults at 0 as midnight. Can be\ + used to group early morning photos with the previous day. Must\ + be a number between 0-23') +@click.option('--exclude-regex', '-e', default=set(), multiple=True, + help='Regular expression for directories or files to exclude.') +@click.option('--filter-by-ext', '-f', default=False, help='''Use filename + extension to filter files for sorting. If used without argument, use + common media file extension for filtering. Ignored files remain in + the same directory structure''' ) +@click.option('--ignore-tags', '-i', default=set(), multiple=True, + help='Specific tags or group that will be ignored when\ + searching for file data. Example \'File:FileModifyDate\' or \'Filename\'' ) +@click.option('--remove-duplicates', '-r', default=False, is_flag=True, + help='True to remove files that are exactly the same in name\ + and a file hash') +@click.option('--verbose', '-v', default=False, is_flag=True, + help='True if you want to see details of file processing') +@click.argument('paths', required=True, nargs=-1, type=click.Path()) +def _sort(debug, dry_run, destination, copy, day_begins, exclude_regex, filter_by_ext, ignore_tags, + remove_duplicates, verbose, paths): + """Sort files or directories by reading their EXIF and organizing them + according to config.ini preferences. + """ + + if copy: + mode = 'copy' + else: + mode = 'move' + + if debug: + constants.debug = logging.DEBUG + elif verbose: + constants.debug = logging.INFO + else: + constants.debug = logging.ERROR + + logger = logging.getLogger('elodie') + logger.setLevel(constants.debug) + + if not destination and paths: + destination = paths[-1] + paths = paths[0:-1] + else: + sys.exit(1) + + paths = set(paths) + destination = _decode(destination) + destination = os.path.abspath(os.path.expanduser(destination)) + + if not os.path.exists(destination): + logger.error(f'Directory {destination} does not exist') + + # if no exclude list was passed in we check if there's a config + if len(exclude_regex) == 0: + config = load_config(constants.CONFIG_FILE) + if 'Exclusions' in config: + exclude_regex = [value for key, value in config.items('Exclusions')] + + exclude_regex_list = set(exclude_regex) + + # Initialize Db + db = Db(destination) + filesystem = FileSystem(mode, dry_run, exclude_regex_list, logger) + + summary, has_errors = filesystem.sort_files(paths, destination, db, remove_duplicates) + + if verbose or debug: + summary.write() + + if has_errors: + sys.exit(1) + + @click.command('generate-db') @click.option('--path', type=click.Path(file_okay=False), required=True, help='Path of your photo library.') @@ -183,6 +281,7 @@ def _generate_db(path, debug): log.progress('', True) result.write() + @click.command('verify') @click.option('--path', type=click.Path(file_okay=False), required=True, help='Path of your photo library.') @@ -298,7 +397,7 @@ def _update(album, location, time, title, paths, debug): db = Db(destination) media = Media.get_class_by_file(current_file, get_all_subclasses()) - if not media: + if media is None: continue updated = False @@ -345,7 +444,7 @@ def _update(album, location, time, title, paths, debug): original_base_name.replace('-%s' % original_title, '')) dest_path = FILESYSTEM.process_file(current_file, destination, db, - updated_media, False, action='move', allowDuplicate=True) + updated_media, False, mode='move', allowDuplicate=True) log.info(u'%s -> %s' % (current_file, dest_path)) log.all('{"source":"%s", "destination":"%s"}' % (current_file, dest_path)) @@ -373,6 +472,7 @@ def main(): main.add_command(_import) +main.add_command(_sort) main.add_command(_update) main.add_command(_generate_db) main.add_command(_verify) diff --git a/elodie/filesystem.py b/elodie/filesystem.py index 58711db..4ae8665 100644 --- a/elodie/filesystem.py +++ b/elodie/filesystem.py @@ -5,6 +5,9 @@ General file system methods. """ from builtins import object +import filecmp +import hashlib +import logging import os import re import shutil @@ -17,13 +20,16 @@ from elodie.config import load_config from elodie import constants from elodie.localstorage import Db -from elodie.media import media +from elodie.media.media import get_media_class from elodie.plugins.plugins import Plugins +from elodie.summary import Summary + class FileSystem(object): """A class for interacting with the file system.""" - def __init__(self): + def __init__(self, mode='copy', dry_run=False, exclude_regex_list=set(), + logger=logging.getLogger()): # The default folder path is along the lines of 2017-06-17_01-04-14-dsc_1234-some-title.jpg self.default_file_name_definition = { 'date': '%Y-%m-%d_%H-%M-%S', @@ -45,9 +51,16 @@ class FileSystem(object): # https://travis-ci.org/jmathai/elodie/builds/483012902 self.whitespace_regex = '[ \t\n\r\f\v]+' + self.dry_run = dry_run + self.exclude_regex_list = exclude_regex_list + self.mode = mode + self.logger = logger + self.summary = Summary() + # Instantiate a plugins object self.plugins = Plugins() + def create_directory(self, directory_path): """Create a directory if it does not already exist. @@ -59,7 +72,9 @@ class FileSystem(object): if os.path.exists(directory_path): return True else: - os.makedirs(directory_path) + if not self.dry_run: + os.makedirs(directory_path) + self.logger.info(f'Create {directory_path}') return True except OSError: # OSError is thrown for cases like no permission @@ -592,6 +607,7 @@ class FileSystem(object): return folder_name + def process_checksum(self, _file, db, allow_duplicate): checksum = db.checksum(_file) if(checksum is None): @@ -618,8 +634,184 @@ class FileSystem(object): )) return checksum + + def checksum(self, file_path, blocksize=65536): + """Create a hash value for the given file. + + See http://stackoverflow.com/a/3431835/1318758. + + :param str file_path: Path to the file to create a hash for. + :param int blocksize: Read blocks of this size from the file when + creating the hash. + :returns: str or None + """ + hasher = hashlib.sha256() + with open(file_path, 'rb') as f: + buf = f.read(blocksize) + + while len(buf) > 0: + hasher.update(buf) + buf = f.read(blocksize) + return hasher.hexdigest() + return None + + + def checkcomp(self, src_path, dest_path): + """Check file. + """ + src_checksum = self.checksum(src_path) + + if self.dry_run: + return src_checksum + + dest_checksum = self.checksum(dest_path) + + if dest_checksum != src_checksum: + self.logger.info(f'Source checksum and destination checksum are not the same') + return False + + return src_checksum + + + def sort_file(self, src_path, dest_path, remove_duplicates=True): + '''Copy or move file to dest_path.''' + + mode = self.mode + dry_run = self.dry_run + + # check for collisions + if(src_path == dest_path): + self.logger.info(f'File {dest_path} already sorted') + return True + if os.path.isfile(dest_path): + self.logger.info(f'File {dest_path} already exist') + if remove_duplicates: + if filecmp.cmp(src_path, dest_path): + self.logger.info(f'File in source and destination are identical. Duplicate will be ignored.') + if(mode == 'move'): + if not dry_run: + shutil.remove(src_path) + self.logger.info(f'remove: {src_path}') + return True + else: # name is same, but file is different + self.logger.info(f'File in source and destination are different.') + return False + else: + return False + else: + if(mode == 'move'): + if not dry_run: + # Move the processed file into the destination directory + shutil.move(src_path, dest_path) + self.logger.info(f'move: {src_path} -> {dest_path}') + elif mode == 'copy': + if not dry_run: + shutil.copy2(src_path, dest_path) + self.logger.info(f'copy: {src_path} -> {dest_path}') + return True + + return False + + + def check_file(self, src_path, dest_path, db): + + # Check if file remain the same + checksum = self.checkcomp(src_path, dest_path) + has_errors = False + if checksum: + if not self.dry_run: + db.add_hash(checksum, dest_path) + db.update_hash_db() + + if dest_path: + self.logger.info(f'{src_path} -> {dest_path}') + + self.summary.append((src_path, dest_path)) + + else: + self.logger.error(f'Files {src_path} and {dest_path} are not identical') + # sys.exit(1) + self.summary.append((src_path, False)) + has_errors = True + + return self.summary, has_errors + + + def get_all_files_in_path(self, path, exclude_regex_list=set()): + files = set() + # some error checking + if not os.path.exists(path): + self.logger.error(f'Directory {path} does not exist') + + path = os.path.expanduser(path) + if os.path.isdir(path): + files.update(self.get_all_files(path, False, exclude_regex_list)) + else: + if not self.should_exclude(path, self.exclude_regex_list, True): + files.add(path) + return files + + + def sort_files(self, paths, destination, db, remove_duplicates=False): + + has_errors = False + for path in paths: + files = self.get_all_files_in_path(path, self.exclude_regex_list) + num_files = len(files) + + conflict_file_list = set() + for src_path in files: + # Process files + media = get_media_class(src_path) + if media: + metadata = media.get_metadata() + # Get the destination path according to metadata + directory_name = self.get_folder_path(metadata, db) + file_name = self.get_file_name(metadata) + else: + # Keep same directory structure + directory_name = os.path.dirname(os.path.relpath(src_path, + path)) + file_name = os.path.basename(src_path) + + dest_directory = os.path.join(destination, directory_name) + dest_path = os.path.join(dest_directory, file_name) + self.create_directory(dest_directory) + result = self.sort_file(src_path, dest_path, remove_duplicates) + if result: + self.summary, has_errors = self.check_file(src_path, dest_path, db) + else: + # There is conflict files + conflict_file_list.add((src_path, dest_path)) + + for src_path, dest_path in conflict_file_list: + # Try to sort the file + result = self.sort_file(src_path, dest_path, remove_duplicates) + if result: + conflict_file_list.remove((src_path, dest_path)) + else: + n = 1 + while not result: + # Add appendix to the name + pre, ext = os.path.splitext(dest_path) + dest_path = pre + '_' + str(n) + ext + result = self.sort_file(src_path, dest_path, remove_duplicates) + if n > 100: + self.logger.error(f'{self.mode}: to many append for {dest_path}...') + break + self.logger.info(f'Same name already exists...renaming to: {dest_path}') + + if result: + self.summary, has_errors = self.check_file(src_path, dest_path, db) + else: + self.summary.append((src_path, False)) + has_errors = True + + return self.summary, has_errors + + def process_file(self, _file, destination, db, media, album_from_folder, - action, **kwargs): + mode, **kwargs): allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] @@ -658,22 +850,15 @@ class FileSystem(object): self.create_directory(dest_directory) - # exiftool renames the original file by appending '_original' to the - # file name. A new file is written with new tags with the initial file - # name. See exiftool man page for more details. - - # Check if the source file was processed by exiftool and an _original - # file was created. - - if(action == 'move'): + if(mode == 'move'): stat = os.stat(_file) # Move the processed file into the destination directory shutil.move(_file, dest_path) - elif action == 'copy': + elif mode == 'copy': shutil.copy2(_file, dest_path) - if action != 'dry_run': + if mode != 'dry_run': # Set the utime based on what the original file contained # before we made any changes. # Then set the utime on the destination file based on metadata. diff --git a/elodie/media/media.py b/elodie/media/media.py index 330acfa..2227038 100644 --- a/elodie/media/media.py +++ b/elodie/media/media.py @@ -11,6 +11,7 @@ are used to represent the actual files. import mimetypes import os import six +import logging # load modules from elodie import log @@ -236,7 +237,11 @@ class Media(): if(extension in i.extensions): return i(_file) - return None + exclude_list = ['.DS_Store', '.directory'] + if os.path.basename(_file) == '.DS_Store': + return None + else: + return Media(_file) @classmethod @@ -600,3 +605,18 @@ def get_all_subclasses(cls=None): return subclasses + +def get_media_class(_file): + if not os.path.exists(_file): + logging.warning(f'Could not find {_file}') + logging.error(f'Could not find {_file}') + return False + + media = Media.get_class_by_file(_file, get_all_subclasses()) + if not media: + logging.warning(f'File{_file} is not supported') + logging.error(f'File {_file} can\'t be imported') + return False + + return media + diff --git a/tests/media/test_media.py b/tests/media/test_media.py index da615d6..fe72b6a 100644 --- a/tests/media/test_media.py +++ b/tests/media/test_media.py @@ -29,12 +29,16 @@ def test_get_all_subclasses(): expected = {Media, Photo, Video, Audio} assert subclasses == expected, subclasses + +# def test_get_media_class(_file): +# pass + def test_get_class_by_file_without_extension(): base_file = helper.get_file('withoutextension') cls = Media.get_class_by_file(base_file, [Audio, Photo, Video]) - assert cls is None, cls + assert cls is not None, cls def test_get_original_name(): temporary_folder, folder = helper.create_working_folder() @@ -156,12 +160,12 @@ def test_get_class_by_file_video(): def test_get_class_by_file_unsupported(): media = Media.get_class_by_file(helper.get_file('text.txt'), [Photo, Video]) - assert media is None + assert media is not None, media def test_get_class_by_file_ds_store(): media = Media.get_class_by_file(helper.get_file('.DS_Store'), [Photo, Video, Audio]) - assert media is None + assert media is None, media def test_get_class_by_file_invalid_type(): media = Media.get_class_by_file(None, diff --git a/tests/test_elodie.py b/tests/test_elodie.py index d03681a..e421cf2 100644 --- a/tests/test_elodie.py +++ b/tests/test_elodie.py @@ -364,6 +364,11 @@ def test_import_file_with_multiple_config_exclude(): assert 'Success 0' in result.output, result.output assert 'Error 0' in result.output, result.output + +def test_get_all_files_in_paths(): + pass + + def test_update_location_on_audio(): temporary_folder, folder = helper.create_working_folder() temporary_folder_destination, folder_destination = helper.create_working_folder() @@ -656,7 +661,6 @@ def test_cli_batch_plugin_googlephotos(): @unittest.skip('to fix') def test_cli_debug_import(): - import ipdb; ipdb.set_trace() runner = CliRunner() # import result = runner.invoke(elodie._import, ['--destination', '/does/not/exist', '/does/not/exist']) diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 581406a..57cecfe 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -713,6 +713,62 @@ def test_parse_folder_name_multiple_keys_not_found(): assert path == 'United States of America', path +def test_checkcomp(): + filesystem = FileSystem() + temporary_folder, folder = helper.create_working_folder() + orig = helper.get_file('photo.png') + src_path1 = os.path.join(folder,'photo.png') + src_path2 = os.path.join(folder,'plain.jpg') + shutil.copyfile(helper.get_file('photo.png'), src_path1) + shutil.copyfile(helper.get_file('plain.jpg'), src_path2) + dest_path = os.path.join(folder,'photo_copy.jpg') + shutil.copyfile(src_path1, dest_path) + checksum1 = filesystem.checksum(src_path1) + checksum2 = filesystem.checksum(src_path2) + valid_checksum = filesystem.checkcomp(dest_path, checksum1) + invalid_checksum = filesystem.checkcomp(dest_path, checksum2) + assert valid_checksum + assert not invalid_checksum + +def test_sort_file(): + filesystem = FileSystem() + temporary_folder, folder = helper.create_working_folder() + src_path = os.path.join(folder,'photo.png') + shutil.copyfile(helper.get_file('photo.png'), src_path) + dest_path1 = os.path.join(folder,'photo_copy.jpg') + checksum1 = filesystem.checksum(src_path) + result_copy = filesystem.sort_file(src_path, dest_path1) + assert result_copy + assert filesystem.checkcomp(dest_path1, checksum1) + + dest_path2 = os.path.join(folder,'photo_move.jpg') + checksum2 = filesystem.checksum(src_path) + result_move = filesystem.sort_file(src_path, dest_path2) + assert result_move + assert filesystem.checkcomp(dest_path2, checksum2) + + +def test_sort_files(): + temporary_folder, folder = helper.create_working_folder() + temporary_folder_destination, folder_destination = helper.create_working_folder() + + db = Db(folder) + filesystem = FileSystem() + + filenames = ['photo.png', 'plain.jpg', 'text.txt', 'withoutextension'] + for src_file in filenames: + origin = os.path.join(folder, src_file) + shutil.copyfile(helper.get_file(src_file), origin) + + summary, has_errors = filesystem.sort_files([folder], folder_destination, db) + + shutil.rmtree(folder) + shutil.rmtree(folder_destination) + + assert summary, summary + assert not has_errors, has_errors + + def test_process_file_invalid(): filesystem = FileSystem() temporary_folder, folder = helper.create_working_folder()