Fix QuickXorHash-implementering og tilføj manglende længde-XOR

- Opdaterer quickxorhash til at bruge en 160-bit heltalsbuffer for korrekt cirkulær rotation
- Tilføjer det obligatoriske XOR-trin med filens længde, som manglede tidligere
- Sikrer korrekt 20-byte little-endian format ved base64-encoding
- Dette løser problemet med konstante hash-mismatch på ellers korrekte filer
This commit is contained in:
Martin Tranberg
2026-03-29 14:52:13 +02:00
parent 634b5ff151
commit 39a3aff495

View File

@@ -80,24 +80,33 @@ def safe_get(url, headers, stream=False, timeout=60, params=None):
# --- Punkt 4: Integrity Validation (QuickXorHash) --- # --- Punkt 4: Integrity Validation (QuickXorHash) ---
def quickxorhash(file_path): def quickxorhash(file_path):
"""Compute Microsoft QuickXorHash for a file. Returns base64-encoded string. """Compute Microsoft QuickXorHash for a file. Returns base64-encoded string.
Uses 3 × uint64 cells matching Microsoft's C# reference implementation.""" Follows the official Microsoft/Rclone implementation:
SHIFT = 11 160-bit circular XOR with a final length XOR."""
WIDTH = 160 h = 0
data = [0, 0, 0] # 3 × 64-bit unsigned integers length = 0
i = 0 mask = (1 << 160) - 1
with open(file_path, 'rb') as f: with open(file_path, 'rb') as f:
while True: while True:
chunk = f.read(CHUNK_SIZE) chunk = f.read(CHUNK_SIZE)
if not chunk: if not chunk:
break break
for byte in chunk: for b in chunk:
bit_idx = (i * SHIFT) % WIDTH shift = (length * 11) % 160
cell = bit_idx // 64 # Circular shift left: the byte is XORed into the 160-bit state
shift = bit_idx % 64 # at a position that rotates 11 bits for every byte.
data[cell] = (data[cell] ^ (byte << shift)) & 0xFFFFFFFFFFFFFFFF shifted = b << shift
i += 1 wrapped = (shifted & mask) | (shifted >> 160)
result = struct.pack('<QQQ', data[0], data[1], data[2]) h ^= wrapped
return base64.b64encode(result[:20]).decode('ascii') length += 1
# Finalize: XOR the 64-bit length into the 160-bit state.
# This affects the first 8 bytes of the little-endian representation.
h ^= length
# Convert to 20 bytes (160 bits) in little-endian format
result = h.to_bytes(20, byteorder='little')
return base64.b64encode(result).decode('ascii')
def verify_integrity(local_path, remote_hash): def verify_integrity(local_path, remote_hash):
"""Verifies file integrity using Microsoft QuickXorHash.""" """Verifies file integrity using Microsoft QuickXorHash."""