diff --git a/README.md b/README.md index c426ce3..57ce45d 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,8 @@ Dette script gør det muligt at downloade specifikke mapper fra et SharePoint do * **Smart Grænse:** Definer en MB-grænse (standard 30 MB), hvor filer herunder altid hashes, mens større filer (f.eks. 65 GB) kun sammenlignes på størrelse for at spare tid (kan konfigureres). * **Robust Bibliotekssøgning:** Finder automatisk dit bibliotek og har indbygget fallback (f.eks. fra "Delte dokumenter" til "Documents"). * **Resume Download:** Understøtter HTTP `Range` headers for genoptagelse af store filer. -* **Auto-Refresh af Downloads & Tokens:** Fornyer automatisk sessioner og links midt i processen. -* **Exponential Backoff:** Håndterer Microsoft Graph throttling (`429 Too Many Requests`) intelligent. +* **Auto-Refresh af Downloads & Tokens:** Fornyer automatisk sessioner og links midt i processen uden unødig ventetid (Optimized 401 handling). +* **Intelligent Fejlhåndtering:** Inkluderer retry-logik med exponential backoff og specialiseret håndtering af udløbne tokens (safe_graph_get). ## Installation diff --git a/download_sharepoint.py b/download_sharepoint.py index 7e1cb56..b3470f7 100644 --- a/download_sharepoint.py +++ b/download_sharepoint.py @@ -79,6 +79,10 @@ def retry_request(func): response.raise_for_status() return response except requests.exceptions.RequestException as e: + # Hvis det er 401, skal vi ikke vente/retry her, da token/URL sandsynligvis er udløbet + if isinstance(e, requests.exceptions.HTTPError) and e.response is not None and e.response.status_code == 401: + raise e + retries += 1 wait = 2 ** retries if retries >= MAX_RETRIES: @@ -92,6 +96,16 @@ def retry_request(func): def safe_get(url, headers, stream=False, timeout=60, params=None): return requests.get(url, headers=headers, stream=stream, timeout=timeout, params=params) +def safe_graph_get(app, url): + """Specialized helper for Graph API calls that handles 401 by refreshing tokens.""" + try: + return safe_get(url, headers=get_headers(app)) + except requests.exceptions.HTTPError as e: + if e.response is not None and e.response.status_code == 401: + logger.info("Access Token expired during Graph call. Forcing refresh...") + return safe_get(url, headers=get_headers(app, force_refresh=True)) + raise + # --- Punkt 4: Integrity Validation (QuickXorHash) --- def quickxorhash(file_path): """Compute Microsoft QuickXorHash for a file. Returns base64-encoded string. @@ -162,12 +176,12 @@ def get_headers(app, force_refresh=False): def get_site_id(app, site_url): parsed = urlparse(site_url) url = f"https://graph.microsoft.com/v1.0/sites/{parsed.netloc}:{parsed.path}" - response = safe_get(url, headers=get_headers(app)) + response = safe_graph_get(app, url) return response.json()['id'] def get_drive_id(app, site_id, drive_name): url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives" - response = safe_get(url, headers=get_headers(app)) + response = safe_graph_get(app, url) drives = response.json().get('value', []) # Prøv præcis match @@ -315,7 +329,6 @@ def process_item_list(app, drive_id, item_path, local_root_path, report, executo logger.warning(f"Max folder depth ({MAX_FOLDER_DEPTH}) reached at: {item_path}. Skipping subtree.") return try: - auth_headers = get_headers(app) encoded_path = quote(item_path) if not item_path: @@ -324,7 +337,7 @@ def process_item_list(app, drive_id, item_path, local_root_path, report, executo url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{encoded_path}:/children" while url: - response = safe_get(url, headers=auth_headers) + response = safe_graph_get(app, url) data = response.json() items = data.get('value', []) @@ -350,8 +363,6 @@ def process_item_list(app, drive_id, item_path, local_root_path, report, executo futures[future] = display_path url = data.get('@odata.nextLink') - if url: - auth_headers = get_headers(app) except Exception as e: logger.error(f"Error traversing {item_path}: {e}")