diff --git a/download_sharepoint.py b/download_sharepoint.py index 47e59ed..98dc7cb 100644 --- a/download_sharepoint.py +++ b/download_sharepoint.py @@ -211,7 +211,7 @@ def get_fresh_download_url(app, drive_id, item_id): return None, "Item returned but '@microsoft.graph.downloadUrl' was missing after 3 attempts." -def download_single_file(app, drive_id, item_id, local_path, expected_size, display_name, remote_hash=None, initial_url=None): +def download_single_file(app, drive_id, item_id, local_path, expected_size, display_name, remote_hash=None, initial_url=None, remote_mtime_str=None): try: file_mode = 'wb' resume_header = {} @@ -220,17 +220,33 @@ def download_single_file(app, drive_id, item_id, local_path, expected_size, disp if os.path.exists(local_path): existing_size = os.path.getsize(local_path) + local_mtime = os.path.getmtime(local_path) + + # Konvertér SharePoint ISO8601 UTC tid (f.eks. 2024-03-29T12:00:00Z) til unix timestamp + # Vi fjerner 'Z' og bruger datetime.fromisoformat + remote_mtime = datetime.fromisoformat(remote_mtime_str.replace('Z', '+00:00')).timestamp() + + # Hvis filen findes, har rigtig størrelse OG lokal er ikke ældre end remote -> SKIP if existing_size == expected_size: - if verify_integrity(local_path, remote_hash): - logger.info(f"Skipped (complete): {display_name}") - return True, None + if local_mtime >= (remote_mtime - 1): # Vi tillader 1 sekuds difference pga. filsystem-præcision + if verify_integrity(local_path, remote_hash): + logger.info(f"Skipped (up-to-date): {display_name}") + return True, None + else: + logger.warning(f"Hash mismatch on existing file: {display_name}. Re-downloading.") + existing_size = 0 else: - logger.warning(f"Hash mismatch on existing file: {display_name}. Re-downloading.") + logger.info(f"Update available: {display_name} (Remote is newer)") existing_size = 0 elif existing_size < expected_size: - logger.info(f"Resuming: {display_name} from {format_size(existing_size)}") - resume_header = {'Range': f'bytes={existing_size}-'} - file_mode = 'ab' + # Ved resume tjekker vi også om kilden er ændret siden vi startede + if local_mtime < (remote_mtime - 1): + logger.warning(f"Remote file changed during partial download: {display_name}. Restarting.") + existing_size = 0 + else: + logger.info(f"Resuming: {display_name} from {format_size(existing_size)}") + resume_header = {'Range': f'bytes={existing_size}-'} + file_mode = 'ab' else: logger.warning(f"Local file larger than remote: {display_name}. Overwriting.") existing_size = 0 @@ -306,12 +322,13 @@ def process_item_list(app, drive_id, item_path, local_root_path, report, executo item_id = item['id'] download_url = item.get('@microsoft.graph.downloadUrl') remote_hash = item.get('file', {}).get('hashes', {}).get('quickXorHash') + remote_mtime = item.get('lastModifiedDateTime') future = executor.submit( download_single_file, app, drive_id, item_id, local_path, item['size'], display_path, - remote_hash, download_url + remote_hash, download_url, remote_mtime ) futures[future] = display_path