Compare commits
2 Commits
6bc4dd8f20
...
59eb9a4ab0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
59eb9a4ab0 | ||
|
|
1c3180e037 |
@@ -17,7 +17,8 @@ A production-ready Python utility for robust synchronization of SharePoint Onlin
|
|||||||
2. **Reliability:** Includes a custom `retry_request` decorator for Exponential Backoff, handling throttling (429) and transient network errors.
|
2. **Reliability:** Includes a custom `retry_request` decorator for Exponential Backoff, handling throttling (429) and transient network errors.
|
||||||
3. **Concurrency:** Multi-threaded architecture (5 workers) for simultaneous scanning and downloading.
|
3. **Concurrency:** Multi-threaded architecture (5 workers) for simultaneous scanning and downloading.
|
||||||
4. **Pagination:** Full support for OData pagination, ensuring complete folder traversal regardless of item count.
|
4. **Pagination:** Full support for OData pagination, ensuring complete folder traversal regardless of item count.
|
||||||
5. **Logging & Audit:** Integrated Python `logging` to `sharepoint_download.log` and structured CSV reports for error auditing.
|
5. **Self-Healing Sessions:** Automatically detects and resolves 401 Unauthorized errors by refreshing both expiring Microsoft Graph Download URLs and MSAL Access Tokens mid-process.
|
||||||
|
6. **Logging & Audit:** Integrated Python `logging` to `sharepoint_download.log` and structured CSV reports for error auditing.
|
||||||
|
|
||||||
## Building and Running
|
## Building and Running
|
||||||
|
|
||||||
@@ -30,7 +31,8 @@ A production-ready Python utility for robust synchronization of SharePoint Onlin
|
|||||||
|
|
||||||
## Development Conventions
|
## Development Conventions
|
||||||
|
|
||||||
* **Error Handling:** Always use the `safe_get` (retry-wrapped) method for Graph API calls.
|
* **Error Handling:** Always use the `safe_get` (retry-wrapped) method for Graph API calls. For item-specific operations, use `get_fresh_download_url` to handle token/URL expiry.
|
||||||
|
* **Authentication:** Use `get_headers(app, force_refresh=True)` when a 401 error is encountered from Graph API to ensure session continuity.
|
||||||
* **Thread Safety:** Use `report_lock` when updating the shared error list from worker threads.
|
* **Thread Safety:** Use `report_lock` when updating the shared error list from worker threads.
|
||||||
* **Logging:** Prefer `logger.info()` or `logger.error()` over `print()` to ensure persistence in `sharepoint_download.log`.
|
* **Logging:** Prefer `logger.info()` or `logger.error()` over `print()` to ensure persistence in `sharepoint_download.log`.
|
||||||
* **Integrity:** Always verify file integrity using `size` and `quickXorHash` where available.
|
* **Integrity:** Always verify file integrity using `size` and `quickXorHash` where available.
|
||||||
|
|||||||
@@ -112,22 +112,37 @@ def get_drive_id(app, site_id, drive_name):
|
|||||||
|
|
||||||
# --- Punkt 2: Resume / Chunked Download logic ---
|
# --- Punkt 2: Resume / Chunked Download logic ---
|
||||||
def get_fresh_download_url(app, drive_id, item_id):
|
def get_fresh_download_url(app, drive_id, item_id):
|
||||||
"""Fetches a fresh download URL for a specific item ID with token refresh support."""
|
"""Fetches a fresh download URL for a specific item ID with retries and robust error handling."""
|
||||||
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{item_id}?$select=id,@microsoft.graph.downloadUrl"
|
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{item_id}"
|
||||||
|
|
||||||
try:
|
for attempt in range(3):
|
||||||
headers = get_headers(app)
|
try:
|
||||||
response = requests.get(url, headers=headers, timeout=60)
|
headers = get_headers(app)
|
||||||
|
|
||||||
if response.status_code == 401:
|
|
||||||
logger.info("Access Token expired. Forcing refresh...")
|
|
||||||
headers = get_headers(app, force_refresh=True)
|
|
||||||
response = requests.get(url, headers=headers, timeout=60)
|
response = requests.get(url, headers=headers, timeout=60)
|
||||||
|
|
||||||
response.raise_for_status()
|
if response.status_code == 401:
|
||||||
return response.json().get('@microsoft.graph.downloadUrl'), None
|
logger.info(f"Access Token expired during refresh (Attempt {attempt+1}). Forcing refresh...")
|
||||||
except Exception as e:
|
headers = get_headers(app, force_refresh=True)
|
||||||
return None, str(e)
|
response = requests.get(url, headers=headers, timeout=60)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
download_url = data.get('@microsoft.graph.downloadUrl')
|
||||||
|
|
||||||
|
if download_url:
|
||||||
|
return download_url, None
|
||||||
|
|
||||||
|
# If item exists but URL is missing, it might be a transient SharePoint issue
|
||||||
|
logger.warning(f"Attempt {attempt+1}: '@microsoft.graph.downloadUrl' missing for {item_id}. Retrying in 1s...")
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if attempt == 2:
|
||||||
|
return None, str(e)
|
||||||
|
logger.warning(f"Attempt {attempt+1} failed: {e}. Retrying...")
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
return None, "Item returned but '@microsoft.graph.downloadUrl' was missing after 3 attempts."
|
||||||
|
|
||||||
def download_single_file(app, drive_id, item_id, local_path, expected_size, display_name, remote_hash=None, initial_url=None):
|
def download_single_file(app, drive_id, item_id, local_path, expected_size, display_name, remote_hash=None, initial_url=None):
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user