Compare commits
2 Commits
6bc4dd8f20
...
59eb9a4ab0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
59eb9a4ab0 | ||
|
|
1c3180e037 |
@@ -17,7 +17,8 @@ A production-ready Python utility for robust synchronization of SharePoint Onlin
|
||||
2. **Reliability:** Includes a custom `retry_request` decorator for Exponential Backoff, handling throttling (429) and transient network errors.
|
||||
3. **Concurrency:** Multi-threaded architecture (5 workers) for simultaneous scanning and downloading.
|
||||
4. **Pagination:** Full support for OData pagination, ensuring complete folder traversal regardless of item count.
|
||||
5. **Logging & Audit:** Integrated Python `logging` to `sharepoint_download.log` and structured CSV reports for error auditing.
|
||||
5. **Self-Healing Sessions:** Automatically detects and resolves 401 Unauthorized errors by refreshing both expiring Microsoft Graph Download URLs and MSAL Access Tokens mid-process.
|
||||
6. **Logging & Audit:** Integrated Python `logging` to `sharepoint_download.log` and structured CSV reports for error auditing.
|
||||
|
||||
## Building and Running
|
||||
|
||||
@@ -30,7 +31,8 @@ A production-ready Python utility for robust synchronization of SharePoint Onlin
|
||||
|
||||
## Development Conventions
|
||||
|
||||
* **Error Handling:** Always use the `safe_get` (retry-wrapped) method for Graph API calls.
|
||||
* **Error Handling:** Always use the `safe_get` (retry-wrapped) method for Graph API calls. For item-specific operations, use `get_fresh_download_url` to handle token/URL expiry.
|
||||
* **Authentication:** Use `get_headers(app, force_refresh=True)` when a 401 error is encountered from Graph API to ensure session continuity.
|
||||
* **Thread Safety:** Use `report_lock` when updating the shared error list from worker threads.
|
||||
* **Logging:** Prefer `logger.info()` or `logger.error()` over `print()` to ensure persistence in `sharepoint_download.log`.
|
||||
* **Integrity:** Always verify file integrity using `size` and `quickXorHash` where available.
|
||||
|
||||
@@ -112,22 +112,37 @@ def get_drive_id(app, site_id, drive_name):
|
||||
|
||||
# --- Punkt 2: Resume / Chunked Download logic ---
|
||||
def get_fresh_download_url(app, drive_id, item_id):
|
||||
"""Fetches a fresh download URL for a specific item ID with token refresh support."""
|
||||
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{item_id}?$select=id,@microsoft.graph.downloadUrl"
|
||||
"""Fetches a fresh download URL for a specific item ID with retries and robust error handling."""
|
||||
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{item_id}"
|
||||
|
||||
try:
|
||||
headers = get_headers(app)
|
||||
response = requests.get(url, headers=headers, timeout=60)
|
||||
|
||||
if response.status_code == 401:
|
||||
logger.info("Access Token expired. Forcing refresh...")
|
||||
headers = get_headers(app, force_refresh=True)
|
||||
for attempt in range(3):
|
||||
try:
|
||||
headers = get_headers(app)
|
||||
response = requests.get(url, headers=headers, timeout=60)
|
||||
|
||||
response.raise_for_status()
|
||||
return response.json().get('@microsoft.graph.downloadUrl'), None
|
||||
except Exception as e:
|
||||
return None, str(e)
|
||||
if response.status_code == 401:
|
||||
logger.info(f"Access Token expired during refresh (Attempt {attempt+1}). Forcing refresh...")
|
||||
headers = get_headers(app, force_refresh=True)
|
||||
response = requests.get(url, headers=headers, timeout=60)
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
download_url = data.get('@microsoft.graph.downloadUrl')
|
||||
|
||||
if download_url:
|
||||
return download_url, None
|
||||
|
||||
# If item exists but URL is missing, it might be a transient SharePoint issue
|
||||
logger.warning(f"Attempt {attempt+1}: '@microsoft.graph.downloadUrl' missing for {item_id}. Retrying in 1s...")
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
if attempt == 2:
|
||||
return None, str(e)
|
||||
logger.warning(f"Attempt {attempt+1} failed: {e}. Retrying...")
|
||||
time.sleep(1)
|
||||
|
||||
return None, "Item returned but '@microsoft.graph.downloadUrl' was missing after 3 attempts."
|
||||
|
||||
def download_single_file(app, drive_id, item_id, local_path, expected_size, display_name, remote_hash=None, initial_url=None):
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user