diff --git a/download_sharepoint.py b/download_sharepoint.py index a00f475..2888956 100644 --- a/download_sharepoint.py +++ b/download_sharepoint.py @@ -30,10 +30,11 @@ logger = logging.getLogger(__name__) report_lock = threading.Lock() def format_size(size_bytes): - for unit in ['B', 'KB', 'MB', 'GB', 'TB']: + for unit in ['B', 'KB', 'MB', 'GB', 'TB', 'PB']: if size_bytes < 1024.0: return f"{size_bytes:.2f} {unit}" size_bytes /= 1024.0 + return f"{size_bytes:.2f} EB" def load_config(file_path): config = {} @@ -68,18 +69,43 @@ def retry_request(func): raise e logger.error(f"Request failed: {e}. Retrying in {wait}s...") time.sleep(wait) - return None + raise requests.exceptions.RetryError(f"Max retries ({MAX_RETRIES}) exceeded.") return wrapper @retry_request def safe_get(url, headers, stream=False, timeout=60, params=None): return requests.get(url, headers=headers, stream=stream, timeout=timeout, params=params) -# --- Punkt 4: Integrity Validation (QuickXorHash - Placeholder for full logic) --- +# --- Punkt 4: Integrity Validation (QuickXorHash) --- +def quickxorhash(file_path): + """Compute Microsoft QuickXorHash for a file. Returns base64-encoded string. + Uses 3 × uint64 cells matching Microsoft's C# reference implementation.""" + SHIFT = 11 + WIDTH = 160 + data = [0, 0, 0] # 3 × 64-bit unsigned integers + i = 0 + with open(file_path, 'rb') as f: + while True: + chunk = f.read(CHUNK_SIZE) + if not chunk: + break + for byte in chunk: + bit_idx = (i * SHIFT) % WIDTH + cell = bit_idx // 64 + shift = bit_idx % 64 + data[cell] = (data[cell] ^ (byte << shift)) & 0xFFFFFFFFFFFFFFFF + i += 1 + result = struct.pack('