Project data management phase 1. Files can be downloaded to server and downloaded locally.

This commit is contained in:
serversdwn
2026-01-16 07:39:22 +00:00
parent 54754e2279
commit 6c7ce5aad0
9 changed files with 783 additions and 271 deletions

View File

@@ -522,51 +522,6 @@ async def get_project_sessions(
})
@router.get("/{project_id}/files", response_class=HTMLResponse)
async def get_project_files(
project_id: str,
request: Request,
db: Session = Depends(get_db),
file_type: Optional[str] = Query(None),
):
"""
Get all data files from all sessions in this project.
Returns HTML partial with file list.
Optional file_type filter: audio, data, log, etc.
"""
from backend.models import DataFile
# Join through RecordingSession to get project files
query = db.query(DataFile).join(
RecordingSession,
DataFile.session_id == RecordingSession.id
).filter(RecordingSession.project_id == project_id)
# Filter by file type if provided
if file_type:
query = query.filter(DataFile.file_type == file_type)
files = query.order_by(DataFile.created_at.desc()).all()
# Enrich with session details
files_data = []
for file in files:
session = None
if file.session_id:
session = db.query(RecordingSession).filter_by(id=file.session_id).first()
files_data.append({
"file": file,
"session": session,
})
return templates.TemplateResponse("partials/projects/file_list.html", {
"request": request,
"project_id": project_id,
"files": files_data,
})
@router.get("/{project_id}/ftp-browser", response_class=HTMLResponse)
async def get_ftp_browser(
project_id: str,
@@ -649,10 +604,11 @@ async def ftp_download_to_server(
project_id=project_id,
location_id=location_id,
unit_id=unit_id,
session_type="sound", # SLMs are sound monitoring devices
status="completed",
started_at=datetime.utcnow(),
stopped_at=datetime.utcnow(),
notes="Auto-created for FTP download"
session_metadata='{"source": "ftp_download", "note": "Auto-created for FTP download"}'
)
db.add(session)
db.commit()
@@ -680,12 +636,35 @@ async def ftp_download_to_server(
# Determine file type from extension
ext = os.path.splitext(filename)[1].lower()
file_type_map = {
# Audio files
'.wav': 'audio',
'.mp3': 'audio',
'.flac': 'audio',
'.m4a': 'audio',
'.aac': 'audio',
# Data files
'.csv': 'data',
'.txt': 'data',
'.log': 'log',
'.json': 'data',
'.xml': 'data',
'.dat': 'data',
# Log files
'.log': 'log',
# Archives
'.zip': 'archive',
'.tar': 'archive',
'.gz': 'archive',
'.7z': 'archive',
'.rar': 'archive',
# Images
'.jpg': 'image',
'.jpeg': 'image',
'.png': 'image',
'.gif': 'image',
# Documents
'.pdf': 'document',
'.doc': 'document',
'.docx': 'document',
}
file_type = file_type_map.get(ext, 'data')
@@ -751,12 +730,15 @@ async def ftp_download_folder_to_server(
db: Session = Depends(get_db),
):
"""
Download an entire folder from an SLM to the server via FTP as a ZIP file.
Creates a DataFile record and stores the ZIP in data/Projects/{project_id}/
Download an entire folder from an SLM to the server via FTP.
Extracts all files from the ZIP and preserves folder structure.
Creates individual DataFile records for each file.
"""
import httpx
import os
import hashlib
import zipfile
import io
from pathlib import Path
from backend.models import DataFile
@@ -785,16 +767,17 @@ async def ftp_download_folder_to_server(
project_id=project_id,
location_id=location_id,
unit_id=unit_id,
session_type="sound", # SLMs are sound monitoring devices
status="completed",
started_at=datetime.utcnow(),
stopped_at=datetime.utcnow(),
notes="Auto-created for FTP folder download"
session_metadata='{"source": "ftp_folder_download", "note": "Auto-created for FTP folder download"}'
)
db.add(session)
db.commit()
db.refresh(session)
# Download folder from SLMM
# Download folder from SLMM (returns ZIP)
SLMM_BASE_URL = os.getenv("SLMM_BASE_URL", "http://localhost:8100")
try:
@@ -812,49 +795,93 @@ async def ftp_download_folder_to_server(
# Extract folder name from remote_path
folder_name = os.path.basename(remote_path.rstrip('/'))
filename = f"{folder_name}.zip"
# Create directory structure: data/Projects/{project_id}/{session_id}/
project_dir = Path(f"data/Projects/{project_id}/{session.id}")
project_dir.mkdir(parents=True, exist_ok=True)
# Create base directory: data/Projects/{project_id}/{session_id}/{folder_name}/
base_dir = Path(f"data/Projects/{project_id}/{session.id}/{folder_name}")
base_dir.mkdir(parents=True, exist_ok=True)
# Save ZIP file to disk
file_path = project_dir / filename
file_content = response.content
# Extract ZIP and save individual files
zip_content = response.content
created_files = []
total_size = 0
with open(file_path, 'wb') as f:
f.write(file_content)
# File type mapping for classification
file_type_map = {
# Audio files
'.wav': 'audio', '.mp3': 'audio', '.flac': 'audio', '.m4a': 'audio', '.aac': 'audio',
# Data files
'.csv': 'data', '.txt': 'data', '.json': 'data', '.xml': 'data', '.dat': 'data',
# Log files
'.log': 'log',
# Archives
'.zip': 'archive', '.tar': 'archive', '.gz': 'archive', '.7z': 'archive', '.rar': 'archive',
# Images
'.jpg': 'image', '.jpeg': 'image', '.png': 'image', '.gif': 'image',
# Documents
'.pdf': 'document', '.doc': 'document', '.docx': 'document',
}
# Calculate checksum
checksum = hashlib.sha256(file_content).hexdigest()
with zipfile.ZipFile(io.BytesIO(zip_content)) as zf:
for zip_info in zf.filelist:
# Skip directories
if zip_info.is_dir():
continue
# Create DataFile record
data_file = DataFile(
id=str(uuid.uuid4()),
session_id=session.id,
file_path=str(file_path.relative_to("data")), # Store relative to data/
file_type='archive', # ZIP archives
file_size_bytes=len(file_content),
downloaded_at=datetime.utcnow(),
checksum=checksum,
file_metadata=json.dumps({
"source": "ftp_folder",
"remote_path": remote_path,
"unit_id": unit_id,
"location_id": location_id,
"folder_name": folder_name,
})
)
# Read file from ZIP
file_data = zf.read(zip_info.filename)
# Determine file path (preserve structure within folder)
# zip_info.filename might be like "Auto_0001/measurement.wav"
file_path = base_dir / zip_info.filename
file_path.parent.mkdir(parents=True, exist_ok=True)
# Write file to disk
with open(file_path, 'wb') as f:
f.write(file_data)
# Calculate checksum
checksum = hashlib.sha256(file_data).hexdigest()
# Determine file type
ext = os.path.splitext(zip_info.filename)[1].lower()
file_type = file_type_map.get(ext, 'data')
# Create DataFile record
data_file = DataFile(
id=str(uuid.uuid4()),
session_id=session.id,
file_path=str(file_path.relative_to("data")),
file_type=file_type,
file_size_bytes=len(file_data),
downloaded_at=datetime.utcnow(),
checksum=checksum,
file_metadata=json.dumps({
"source": "ftp_folder",
"remote_path": remote_path,
"unit_id": unit_id,
"location_id": location_id,
"folder_name": folder_name,
"relative_path": zip_info.filename,
})
)
db.add(data_file)
created_files.append({
"filename": zip_info.filename,
"size": len(file_data),
"type": file_type
})
total_size += len(file_data)
db.add(data_file)
db.commit()
return {
"success": True,
"message": f"Downloaded folder {folder_name} to server as ZIP",
"file_id": data_file.id,
"file_path": str(file_path),
"file_size": len(file_content),
"message": f"Downloaded folder {folder_name} with {len(created_files)} files",
"folder_name": folder_name,
"file_count": len(created_files),
"total_size": total_size,
"files": created_files,
}
except httpx.TimeoutException:
@@ -862,6 +889,11 @@ async def ftp_download_folder_to_server(
status_code=504,
detail="Timeout downloading folder from SLM (large folders may take a while)"
)
except zipfile.BadZipFile:
raise HTTPException(
status_code=500,
detail="Downloaded file is not a valid ZIP archive"
)
except Exception as e:
logger.error(f"Error downloading folder to server: {e}")
raise HTTPException(
@@ -874,6 +906,121 @@ async def ftp_download_folder_to_server(
# Project Types
# ============================================================================
@router.get("/{project_id}/files-unified", response_class=HTMLResponse)
async def get_unified_files(
project_id: str,
request: Request,
db: Session = Depends(get_db),
):
"""
Get unified view of all files in this project.
Groups files by recording session with full metadata.
Returns HTML partial with hierarchical file listing.
"""
from backend.models import DataFile
from pathlib import Path
import json
# Get all sessions for this project
sessions = db.query(RecordingSession).filter_by(
project_id=project_id
).order_by(RecordingSession.started_at.desc()).all()
sessions_data = []
for session in sessions:
# Get files for this session
files = db.query(DataFile).filter_by(session_id=session.id).all()
# Skip sessions with no files
if not files:
continue
# Get session context
unit = None
location = None
if session.unit_id:
unit = db.query(RosterUnit).filter_by(id=session.unit_id).first()
if session.location_id:
location = db.query(MonitoringLocation).filter_by(id=session.location_id).first()
files_data = []
for file in files:
# Check if file exists on disk
file_path = Path("data") / file.file_path
exists_on_disk = file_path.exists()
# Get actual file size if exists
actual_size = file_path.stat().st_size if exists_on_disk else None
# Parse metadata JSON
metadata = {}
try:
if file.file_metadata:
metadata = json.loads(file.file_metadata)
except Exception as e:
logger.warning(f"Failed to parse metadata for file {file.id}: {e}")
files_data.append({
"file": file,
"exists_on_disk": exists_on_disk,
"actual_size": actual_size,
"metadata": metadata,
})
sessions_data.append({
"session": session,
"unit": unit,
"location": location,
"files": files_data,
})
return templates.TemplateResponse("partials/projects/unified_files.html", {
"request": request,
"project_id": project_id,
"sessions": sessions_data,
})
@router.get("/{project_id}/files/{file_id}/download")
async def download_project_file(
project_id: str,
file_id: str,
db: Session = Depends(get_db),
):
"""
Download a data file from a project.
Returns the file for download.
"""
from backend.models import DataFile
from fastapi.responses import FileResponse
from pathlib import Path
# Get the file record
file_record = db.query(DataFile).filter_by(id=file_id).first()
if not file_record:
raise HTTPException(status_code=404, detail="File not found")
# Verify file belongs to this project
session = db.query(RecordingSession).filter_by(id=file_record.session_id).first()
if not session or session.project_id != project_id:
raise HTTPException(status_code=403, detail="File does not belong to this project")
# Build full file path
file_path = Path("data") / file_record.file_path
if not file_path.exists():
raise HTTPException(status_code=404, detail="File not found on disk")
# Extract filename for download
filename = file_path.name
return FileResponse(
path=str(file_path),
filename=filename,
media_type="application/octet-stream"
)
@router.get("/types/list", response_class=HTMLResponse)
async def get_project_types(request: Request, db: Session = Depends(get_db)):
"""