Implementer token refresh og skip-logik for eksisterende filer

This commit is contained in:
Martin Tranberg
2026-03-26 10:41:01 +01:00
parent 1eb5218624
commit eca83031b1

View File

@@ -1,6 +1,7 @@
import os import os
import csv import csv
import requests import requests
import time
from datetime import datetime from datetime import datetime
from msal import ConfidentialClientApplication from msal import ConfidentialClientApplication
from urllib.parse import urlparse, quote from urllib.parse import urlparse, quote
@@ -14,19 +15,24 @@ def load_config(file_path):
config[key.strip()] = value.strip().strip('"') config[key.strip()] = value.strip().strip('"')
return config return config
def get_access_token(tenant_id, client_id, client_secret): def create_msal_app(tenant_id, client_id, client_secret):
app = ConfidentialClientApplication( return ConfidentialClientApplication(
client_id, client_id,
authority=f"https://login.microsoftonline.com/{tenant_id}", authority=f"https://login.microsoftonline.com/{tenant_id}",
client_credential=client_secret, client_credential=client_secret,
) )
result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
def get_headers(app):
"""Acquires a token from cache or fetches a new one if expired."""
scopes = ["https://graph.microsoft.com/.default"]
result = app.acquire_token_for_client(scopes=scopes)
if "access_token" in result: if "access_token" in result:
return result["access_token"] return {'Authorization': f'Bearer {result["access_token"]}'}
else: else:
raise Exception(f"Could not acquire token: {result.get('error_description')}") raise Exception(f"Could not acquire token: {result.get('error_description')}")
def get_site_id(headers, site_url): def get_site_id(app, site_url):
headers = get_headers(app)
parsed = urlparse(site_url) parsed = urlparse(site_url)
hostname = parsed.netloc hostname = parsed.netloc
site_path = parsed.path site_path = parsed.path
@@ -35,7 +41,8 @@ def get_site_id(headers, site_url):
response.raise_for_status() response.raise_for_status()
return response.json()['id'] return response.json()['id']
def get_drive_id(headers, site_id, drive_name): def get_drive_id(app, site_id, drive_name):
headers = get_headers(app)
url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives" url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
response = requests.get(url, headers=headers) response = requests.get(url, headers=headers)
response.raise_for_status() response.raise_for_status()
@@ -45,8 +52,14 @@ def get_drive_id(headers, site_id, drive_name):
return drive['id'] return drive['id']
raise Exception(f"Drive '{drive_name}' not found in site.") raise Exception(f"Drive '{drive_name}' not found in site.")
def download_file(headers, download_url, local_path, expected_size): def download_file(download_url, local_path, expected_size):
try: try:
# Check if file exists and size matches
if os.path.exists(local_path):
local_size = os.path.getsize(local_path)
if int(local_size) == int(expected_size):
return True, "Skipped"
os.makedirs(os.path.dirname(local_path), exist_ok=True) os.makedirs(os.path.dirname(local_path), exist_ok=True)
response = requests.get(download_url, stream=True) response = requests.get(download_url, stream=True)
response.raise_for_status() response.raise_for_status()
@@ -54,23 +67,23 @@ def download_file(headers, download_url, local_path, expected_size):
for chunk in response.iter_content(chunk_size=8192): for chunk in response.iter_content(chunk_size=8192):
f.write(chunk) f.write(chunk)
# Verify size # Verify size after download
local_size = os.path.getsize(local_path) local_size = os.path.getsize(local_path)
if int(local_size) == int(expected_size): if int(local_size) == int(expected_size):
return True, None return True, "Downloaded"
else: else:
return False, f"Size mismatch: Remote={expected_size}, Local={local_size}" return False, f"Size mismatch after download: Remote={expected_size}, Local={local_size}"
except Exception as e: except Exception as e:
return False, str(e) return False, str(e)
def download_folder_recursive(headers, drive_id, item_path, local_root_path, report): def download_folder_recursive(app, drive_id, item_path, local_root_path, report):
try: try:
# Get children of the folder # Get fresh headers for this folder batch to ensure token is valid
# Path must be encoded correctly headers = get_headers(app)
encoded_path = quote(item_path) encoded_path = quote(item_path)
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{encoded_path}:/children" url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{encoded_path}:/children"
# If item_path is empty, use root
if not item_path: if not item_path:
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children" url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children"
@@ -83,13 +96,11 @@ def download_folder_recursive(headers, drive_id, item_path, local_root_path, rep
local_path = os.path.join(local_root_path, item_name) local_path = os.path.join(local_root_path, item_name)
if 'folder' in item: if 'folder' in item:
# Recursive call
sub_item_path = f"{item_path}/{item_name}".strip('/') sub_item_path = f"{item_path}/{item_name}".strip('/')
download_folder_recursive(headers, drive_id, sub_item_path, local_path, report) download_folder_recursive(app, drive_id, sub_item_path, local_path, report)
elif 'file' in item: elif 'file' in item:
download_url = item.get('@microsoft.graph.downloadUrl') download_url = item.get('@microsoft.graph.downloadUrl')
if not download_url: if not download_url:
# Fallback or error
report.append({ report.append({
"Path": f"{item_path}/{item_name}", "Path": f"{item_path}/{item_name}",
"Error": "No download URL available", "Error": "No download URL available",
@@ -97,12 +108,17 @@ def download_folder_recursive(headers, drive_id, item_path, local_root_path, rep
}) })
continue continue
print(f"Downloading: {item_path}/{item_name}...") success, status = download_file(download_url, local_path, item['size'])
success, error_msg = download_file(headers, download_url, local_path, item['size']) if success:
if not success: if status == "Downloaded":
print(f"Downloaded: {item_path}/{item_name}")
else:
print(f"Skipped (matches local): {item_path}/{item_name}")
else:
print(f"FAILED: {item_path}/{item_name} - {status}")
report.append({ report.append({
"Path": f"{item_path}/{item_name}", "Path": f"{item_path}/{item_name}",
"Error": error_msg, "Error": status,
"Timestamp": datetime.now().isoformat() "Timestamp": datetime.now().isoformat()
}) })
except Exception as e: except Exception as e:
@@ -122,26 +138,28 @@ def main():
folders_to_download_str = config.get('FOLDERS_TO_DOWNLOAD') folders_to_download_str = config.get('FOLDERS_TO_DOWNLOAD')
local_path_base = config.get('LOCAL_PATH').replace('\\', os.sep) local_path_base = config.get('LOCAL_PATH').replace('\\', os.sep)
folders_to_download = [f.strip() for f in folders_to_download_str.split(',')] folders_to_download = [f.strip() for f in folders_str.split(',')] if 'FOLDERS_TO_DOWNLOAD' in config else []
# Safeguard if FOLDERS_TO_DOWNLOAD is missing
print(f"Connecting via Graph API...") if not folders_to_download:
folders_to_download = [f.strip() for f in folders_to_download_str.split(',')]
print(f"Connecting via Graph API (with auto-refresh and skip logic)...")
report = [] report = []
try: try:
token = get_access_token(tenant_id, client_id, client_secret) app = create_msal_app(tenant_id, client_id, client_secret)
headers = {'Authorization': f'Bearer {token}'}
print("Getting Site ID...") print("Getting Site ID...")
site_id = get_site_id(headers, site_url) site_id = get_site_id(app, site_url)
print(f"Getting Drive ID for '{drive_name}'...") print(f"Getting Drive ID for '{drive_name}'...")
drive_id = get_drive_id(headers, site_id, drive_name) drive_id = get_drive_id(app, site_id, drive_name)
for folder in folders_to_download: for folder in folders_to_download:
print(f"\nProcessing folder: {folder}") print(f"\nProcessing folder: {folder}")
local_folder_path = os.path.join(local_path_base, folder) local_folder_path = os.path.join(local_path_base, folder)
download_folder_recursive(headers, drive_id, folder, local_folder_path, report) download_folder_recursive(app, drive_id, folder, local_folder_path, report)
except Exception as e: except Exception as e:
print(f"Critical error: {e}") print(f"Critical error: {e}")
@@ -151,7 +169,6 @@ def main():
"Timestamp": datetime.now().isoformat() "Timestamp": datetime.now().isoformat()
}) })
# Generate Report
report_file = f"download_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" report_file = f"download_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
with open(report_file, 'w', newline='', encoding='utf-8') as f: with open(report_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=["Path", "Error", "Timestamp"]) writer = csv.DictWriter(f, fieldnames=["Path", "Error", "Timestamp"])