From fb20712c2e8d16cbc12a609c9af87a3b185014a8 Mon Sep 17 00:00:00 2001
From: Martin Tranberg <tranberg@it-solutions.dk>
Date: Thu, 26 Mar 2026 10:34:31 +0100
Subject: [PATCH] Initial commit: Sharepoint folder download tool using Graph
 API

---
 .gitignore             |   5 ++
 download_sharepoint.py | 168 +++++++++++++++++++++++++++++++++++++++++
 requirements.txt       |   2 +
 3 files changed, 175 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 download_sharepoint.py
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d6a06a0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+connection_info.txt
+download_report_*.csv
+__pycache__/
+*.pyc
+test_graph.py
diff --git a/download_sharepoint.py b/download_sharepoint.py
new file mode 100644
index 0000000..5cad4e0
--- /dev/null
+++ b/download_sharepoint.py
@@ -0,0 +1,168 @@
+import os
+import csv
+import requests
+from datetime import datetime
+from msal import ConfidentialClientApplication
+from urllib.parse import urlparse, quote
+
+def load_config(file_path):
+    config = {}
+    with open(file_path, 'r', encoding='utf-8') as f:
+        for line in f:
+            if '=' in line:
+                key, value = line.split('=', 1)
+                config[key.strip()] = value.strip().strip('"')
+    return config
+
+def get_access_token(tenant_id, client_id, client_secret):
+    app = ConfidentialClientApplication(
+        client_id,
+        authority=f"https://login.microsoftonline.com/{tenant_id}",
+        client_credential=client_secret,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" in result:
+        return result["access_token"]
+    else:
+        raise Exception(f"Could not acquire token: {result.get('error_description')}")
+
+def get_site_id(headers, site_url):
+    parsed = urlparse(site_url)
+    hostname = parsed.netloc
+    site_path = parsed.path
+    url = f"https://graph.microsoft.com/v1.0/sites/{hostname}:{site_path}"
+    response = requests.get(url, headers=headers)
+    response.raise_for_status()
+    return response.json()['id']
+
+def get_drive_id(headers, site_id, drive_name):
+    url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
+    response = requests.get(url, headers=headers)
+    response.raise_for_status()
+    drives = response.json().get('value', [])
+    for drive in drives:
+        if drive['name'] == drive_name:
+            return drive['id']
+    raise Exception(f"Drive '{drive_name}' not found in site.")
+
+def download_file(headers, download_url, local_path, expected_size):
+    try:
+        os.makedirs(os.path.dirname(local_path), exist_ok=True)
+        response = requests.get(download_url, stream=True)
+        response.raise_for_status()
+        with open(local_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        
+        # Verify size
+        local_size = os.path.getsize(local_path)
+        if int(local_size) == int(expected_size):
+            return True, None
+        else:
+            return False, f"Size mismatch: Remote={expected_size}, Local={local_size}"
+    except Exception as e:
+        return False, str(e)
+
+def download_folder_recursive(headers, drive_id, item_path, local_root_path, report):
+    try:
+        # Get children of the folder
+        # Path must be encoded correctly
+        encoded_path = quote(item_path)
+        url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{encoded_path}:/children"
+        
+        # If item_path is empty, use root
+        if not item_path:
+            url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children"
+            
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        items = response.json().get('value', [])
+        
+        for item in items:
+            item_name = item['name']
+            local_path = os.path.join(local_root_path, item_name)
+            
+            if 'folder' in item:
+                # Recursive call
+                sub_item_path = f"{item_path}/{item_name}".strip('/')
+                download_folder_recursive(headers, drive_id, sub_item_path, local_path, report)
+            elif 'file' in item:
+                download_url = item.get('@microsoft.graph.downloadUrl')
+                if not download_url:
+                    # Fallback or error
+                    report.append({
+                        "Path": f"{item_path}/{item_name}",
+                        "Error": "No download URL available",
+                        "Timestamp": datetime.now().isoformat()
+                    })
+                    continue
+                
+                print(f"Downloading: {item_path}/{item_name}...")
+                success, error_msg = download_file(headers, download_url, local_path, item['size'])
+                if not success:
+                    report.append({
+                        "Path": f"{item_path}/{item_name}",
+                        "Error": error_msg,
+                        "Timestamp": datetime.now().isoformat()
+                    })
+    except Exception as e:
+        report.append({
+            "Path": item_path,
+            "Error": f"Folder processing error: {str(e)}",
+            "Timestamp": datetime.now().isoformat()
+        })
+
+def main():
+    config = load_config('connection_info.txt')
+    tenant_id = config.get('TENANT_ID')
+    client_id = config.get('CLIENT_ID')
+    client_secret = config.get('CLIENT_SECRET')
+    site_url = config.get('SITE_URL')
+    drive_name = config.get('DOCUMENT_LIBRARY')
+    folders_to_download_str = config.get('FOLDERS_TO_DOWNLOAD')
+    local_path_base = config.get('LOCAL_PATH').replace('\\', os.sep)
+    
+    folders_to_download = [f.strip() for f in folders_to_download_str.split(',')]
+    
+    print(f"Connecting via Graph API...")
+    
+    report = []
+    
+    try:
+        token = get_access_token(tenant_id, client_id, client_secret)
+        headers = {'Authorization': f'Bearer {token}'}
+        
+        print("Getting Site ID...")
+        site_id = get_site_id(headers, site_url)
+        
+        print(f"Getting Drive ID for '{drive_name}'...")
+        drive_id = get_drive_id(headers, site_id, drive_name)
+        
+        for folder in folders_to_download:
+            print(f"\nProcessing folder: {folder}")
+            local_folder_path = os.path.join(local_path_base, folder)
+            download_folder_recursive(headers, drive_id, folder, local_folder_path, report)
+            
+    except Exception as e:
+        print(f"Critical error: {e}")
+        report.append({
+            "Path": "GENERAL",
+            "Error": str(e),
+            "Timestamp": datetime.now().isoformat()
+        })
+        
+    # Generate Report
+    report_file = f"download_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
+    with open(report_file, 'w', newline='', encoding='utf-8') as f:
+        writer = csv.DictWriter(f, fieldnames=["Path", "Error", "Timestamp"])
+        writer.writeheader()
+        writer.writerows(report)
+    
+    print(f"\nProcess complete.")
+    if report:
+        print(f"Errors found: {len(report)}. See {report_file} for details.")
+    else:
+        print("All downloads successful.")
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a1ae00e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+requests
+msal