diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..4dfac01 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,49 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +A Python utility that synchronizes SharePoint Online folders to local storage using the Microsoft Graph API. Offers both a CLI (`download_sharepoint.py`) and a modern GUI (`sharepoint_gui.py`). + +## Running the Tool + +```bash +# Install dependencies +pip install -r requirements.txt + +# GUI mode (recommended for interactive use) +python sharepoint_gui.py + +# CLI mode (for automation/scripting) +python download_sharepoint.py +``` + +Configuration is read from `connection_info.txt` (gitignored — copy from `connection_info.template.txt` and fill in credentials). + +## Architecture + +Two-file structure with clear separation of concerns: + +**`download_sharepoint.py`** — Core engine with four logical layers: +1. **Authentication** — MSAL `ConfidentialClientApplication` using OAuth 2.0 Client Credentials flow. Tokens are refreshed via `force_refresh=True` when a 401 is received. +2. **Graph API navigation** — `get_site_id()` → `get_drive_id()` → `process_item_list()` (recursive, handles `@odata.nextLink` pagination). +3. **Download & resilience** — `download_single_file()` with Range header support for resumable downloads. `get_fresh_download_url()` handles expired pre-signed URLs. The `@retry_request` decorator provides exponential backoff (up to 5 retries, 2^n seconds) for 429s and network errors. +4. **Concurrency** — `ThreadPoolExecutor` (max 5 workers). A `report_lock` guards the shared error list. A `stop_event` allows the GUI stop button to cancel in-flight work. + +**`sharepoint_gui.py`** — CustomTkinter wrapper that: +- Persists settings to a local JSON file +- Spawns the core engine in a background thread +- Patches `requests.get` to route through the GUI's log display +- Provides a folder browser for `LOCAL_PATH` + +## Key Behaviors to Preserve + +- **Self-healing sessions**: On 401, the code refreshes both the MSAL access token *and* the pre-signed Graph download URL before retrying — these are two separate expiry mechanisms. +- **Resumable downloads**: Files are downloaded in 1 MB chunks using HTTP Range headers. Existing files are skipped if their size matches; partial files are resumed from the last byte. +- **Stop signal**: `stop_event.is_set()` is checked in the download loop and recursive traversal — any new code that loops must respect this. + +## Output + +- `sharepoint_download.log` — Full operation log +- `download_report_YYYYMMDD_HHMMSS.csv` — Per-run error report (gitignored) diff --git a/download_sharepoint.py b/download_sharepoint.py index 754f249..a00f475 100644 --- a/download_sharepoint.py +++ b/download_sharepoint.py @@ -91,7 +91,7 @@ def get_headers(app, force_refresh=False): if force_refresh or not result or "access_token" not in result: logger.info("Refreshing Access Token...") - result = app.acquire_token_for_client(scopes=scopes) + result = app.acquire_token_for_client(scopes=scopes, force_refresh=True) if "access_token" in result: return {'Authorization': f'Bearer {result["access_token"]}'} @@ -182,7 +182,7 @@ def download_single_file(app, drive_id, item_id, local_path, expected_size, disp if not download_url: return False, f"Failed to refresh download URL: {err}" # Retry download with new URL - response = requests.get(download_url, headers=resume_header, stream=True, timeout=120) + response = safe_get(download_url, resume_header, stream=True, timeout=120) response.raise_for_status() @@ -291,8 +291,9 @@ def main(): report_file = f"download_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" with open(report_file, 'w', newline='', encoding='utf-8') as f: - csv.DictWriter(f, fieldnames=["Path", "Error", "Timestamp"]).writeheader() - csv.DictWriter(f, fieldnames=["Path", "Error", "Timestamp"]).writerows(report) + writer = csv.DictWriter(f, fieldnames=["Path", "Error", "Timestamp"]) + writer.writeheader() + writer.writerows(report) logger.info(f"Sync complete. Errors: {len(report)}. Report: {report_file}")