Tilføj 429-håndtering, eksponentiel backoff og dybdebegrænsning
- get_fresh_download_url: tilføjer 429-tjek med Retry-After og erstatter fast sleep(1) med eksponentiel backoff (2^attempt sekunder) - process_item_list: tilføjer MAX_FOLDER_DEPTH=50 guard mod RecursionError ved unormalt dybe SharePoint-mappestrukturer - README og CLAUDE.md opdateret med beskrivelse af nye adfærd Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,7 @@ from urllib.parse import urlparse, quote
|
||||
MAX_WORKERS = 5
|
||||
MAX_RETRIES = 5
|
||||
CHUNK_SIZE = 1024 * 1024 # 1MB Chunks
|
||||
MAX_FOLDER_DEPTH = 50
|
||||
LOG_FILE = "sharepoint_download.log"
|
||||
|
||||
# Setup Logging
|
||||
@@ -145,28 +146,34 @@ def get_fresh_download_url(app, drive_id, item_id):
|
||||
try:
|
||||
headers = get_headers(app)
|
||||
response = requests.get(url, headers=headers, timeout=60)
|
||||
|
||||
|
||||
if response.status_code == 429:
|
||||
retry_after = int(response.headers.get("Retry-After", 2 ** attempt))
|
||||
logger.warning(f"Throttled (429) in get_fresh_download_url. Waiting {retry_after}s...")
|
||||
time.sleep(retry_after)
|
||||
continue
|
||||
|
||||
if response.status_code == 401:
|
||||
logger.info(f"Access Token expired during refresh (Attempt {attempt+1}). Forcing refresh...")
|
||||
headers = get_headers(app, force_refresh=True)
|
||||
response = requests.get(url, headers=headers, timeout=60)
|
||||
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
download_url = data.get('@microsoft.graph.downloadUrl')
|
||||
|
||||
|
||||
if download_url:
|
||||
return download_url, None
|
||||
|
||||
|
||||
# If item exists but URL is missing, it might be a transient SharePoint issue
|
||||
logger.warning(f"Attempt {attempt+1}: '@microsoft.graph.downloadUrl' missing for {item_id}. Retrying in 1s...")
|
||||
time.sleep(1)
|
||||
|
||||
logger.warning(f"Attempt {attempt+1}: '@microsoft.graph.downloadUrl' missing for {item_id}. Retrying in {2 ** attempt}s...")
|
||||
time.sleep(2 ** attempt)
|
||||
|
||||
except Exception as e:
|
||||
if attempt == 2:
|
||||
return None, str(e)
|
||||
logger.warning(f"Attempt {attempt+1} failed: {e}. Retrying...")
|
||||
time.sleep(1)
|
||||
logger.warning(f"Attempt {attempt+1} failed: {e}. Retrying in {2 ** attempt}s...")
|
||||
time.sleep(2 ** attempt)
|
||||
|
||||
return None, "Item returned but '@microsoft.graph.downloadUrl' was missing after 3 attempts."
|
||||
|
||||
@@ -236,7 +243,10 @@ def download_single_file(app, drive_id, item_id, local_path, expected_size, disp
|
||||
return False, str(e)
|
||||
|
||||
# --- Main Traversal Logic ---
|
||||
def process_item_list(app, drive_id, item_path, local_root_path, report, executor, futures):
|
||||
def process_item_list(app, drive_id, item_path, local_root_path, report, executor, futures, depth=0):
|
||||
if depth >= MAX_FOLDER_DEPTH:
|
||||
logger.warning(f"Max folder depth ({MAX_FOLDER_DEPTH}) reached at: {item_path}. Skipping subtree.")
|
||||
return
|
||||
try:
|
||||
auth_headers = get_headers(app)
|
||||
encoded_path = quote(item_path)
|
||||
@@ -257,7 +267,7 @@ def process_item_list(app, drive_id, item_path, local_root_path, report, executo
|
||||
display_path = f"{item_path}/{item_name}".strip('/')
|
||||
|
||||
if 'folder' in item:
|
||||
process_item_list(app, drive_id, display_path, local_path, report, executor, futures)
|
||||
process_item_list(app, drive_id, display_path, local_path, report, executor, futures, depth + 1)
|
||||
elif 'file' in item:
|
||||
item_id = item['id']
|
||||
download_url = item.get('@microsoft.graph.downloadUrl')
|
||||
|
||||
Reference in New Issue
Block a user