Compare commits

..

2 Commits

2 changed files with 32 additions and 15 deletions

View File

@@ -17,7 +17,8 @@ A production-ready Python utility for robust synchronization of SharePoint Onlin
2. **Reliability:** Includes a custom `retry_request` decorator for Exponential Backoff, handling throttling (429) and transient network errors. 2. **Reliability:** Includes a custom `retry_request` decorator for Exponential Backoff, handling throttling (429) and transient network errors.
3. **Concurrency:** Multi-threaded architecture (5 workers) for simultaneous scanning and downloading. 3. **Concurrency:** Multi-threaded architecture (5 workers) for simultaneous scanning and downloading.
4. **Pagination:** Full support for OData pagination, ensuring complete folder traversal regardless of item count. 4. **Pagination:** Full support for OData pagination, ensuring complete folder traversal regardless of item count.
5. **Logging & Audit:** Integrated Python `logging` to `sharepoint_download.log` and structured CSV reports for error auditing. 5. **Self-Healing Sessions:** Automatically detects and resolves 401 Unauthorized errors by refreshing both expiring Microsoft Graph Download URLs and MSAL Access Tokens mid-process.
6. **Logging & Audit:** Integrated Python `logging` to `sharepoint_download.log` and structured CSV reports for error auditing.
## Building and Running ## Building and Running
@@ -30,7 +31,8 @@ A production-ready Python utility for robust synchronization of SharePoint Onlin
## Development Conventions ## Development Conventions
* **Error Handling:** Always use the `safe_get` (retry-wrapped) method for Graph API calls. * **Error Handling:** Always use the `safe_get` (retry-wrapped) method for Graph API calls. For item-specific operations, use `get_fresh_download_url` to handle token/URL expiry.
* **Authentication:** Use `get_headers(app, force_refresh=True)` when a 401 error is encountered from Graph API to ensure session continuity.
* **Thread Safety:** Use `report_lock` when updating the shared error list from worker threads. * **Thread Safety:** Use `report_lock` when updating the shared error list from worker threads.
* **Logging:** Prefer `logger.info()` or `logger.error()` over `print()` to ensure persistence in `sharepoint_download.log`. * **Logging:** Prefer `logger.info()` or `logger.error()` over `print()` to ensure persistence in `sharepoint_download.log`.
* **Integrity:** Always verify file integrity using `size` and `quickXorHash` where available. * **Integrity:** Always verify file integrity using `size` and `quickXorHash` where available.

View File

@@ -112,22 +112,37 @@ def get_drive_id(app, site_id, drive_name):
# --- Punkt 2: Resume / Chunked Download logic --- # --- Punkt 2: Resume / Chunked Download logic ---
def get_fresh_download_url(app, drive_id, item_id): def get_fresh_download_url(app, drive_id, item_id):
"""Fetches a fresh download URL for a specific item ID with token refresh support.""" """Fetches a fresh download URL for a specific item ID with retries and robust error handling."""
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{item_id}?$select=id,@microsoft.graph.downloadUrl" url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{item_id}"
for attempt in range(3):
try: try:
headers = get_headers(app) headers = get_headers(app)
response = requests.get(url, headers=headers, timeout=60) response = requests.get(url, headers=headers, timeout=60)
if response.status_code == 401: if response.status_code == 401:
logger.info("Access Token expired. Forcing refresh...") logger.info(f"Access Token expired during refresh (Attempt {attempt+1}). Forcing refresh...")
headers = get_headers(app, force_refresh=True) headers = get_headers(app, force_refresh=True)
response = requests.get(url, headers=headers, timeout=60) response = requests.get(url, headers=headers, timeout=60)
response.raise_for_status() response.raise_for_status()
return response.json().get('@microsoft.graph.downloadUrl'), None data = response.json()
download_url = data.get('@microsoft.graph.downloadUrl')
if download_url:
return download_url, None
# If item exists but URL is missing, it might be a transient SharePoint issue
logger.warning(f"Attempt {attempt+1}: '@microsoft.graph.downloadUrl' missing for {item_id}. Retrying in 1s...")
time.sleep(1)
except Exception as e: except Exception as e:
if attempt == 2:
return None, str(e) return None, str(e)
logger.warning(f"Attempt {attempt+1} failed: {e}. Retrying...")
time.sleep(1)
return None, "Item returned but '@microsoft.graph.downloadUrl' was missing after 3 attempts."
def download_single_file(app, drive_id, item_id, local_path, expected_size, display_name, remote_hash=None, initial_url=None): def download_single_file(app, drive_id, item_id, local_path, expected_size, display_name, remote_hash=None, initial_url=None):
try: try: