Project Management API

Projects in Flow provide a way to organize related samples, executions, and data into cohesive units. This guide covers all aspects of project management through the API.

Understanding Projects

Projects serve as containers for organizing biological studies and experiments.

Project Structure

# A typical project contains:
project = {
    "id": 10,
    "name": "Mouse RNA-seq Time Course",
    "description": "Investigating gene expression changes over time",
    "organism": {"id": "10090", "name": "Mus musculus"},
    "is_public": False,
    "created": "2024-01-01T10:00:00Z",
    "modified": "2024-01-15T14:30:00Z",
    "owner": {"id": 1, "username": "researcher"},
    "sample_count": 24,
    "execution_count": 8,
    "total_size": 125_000_000_000,  # 125 GB
    "metadata": {
        "study_type": "time_course",
        "funding": "NIH R01-12345",
        "publication": "doi:10.1234/journal.2024.001",
        "pi": "Dr. Jane Smith",
        "institution": "University of Science"
    }
}

Creating Projects

Basic Project Creation

# Python client
project = client.create_project(
    name="Cancer Genomics Study",
    organism="human",
    description="Comprehensive genomic analysis of tumor samples"
)

print(f"Created project: {project.id}")

REST API Project Creation

curl -X POST https://api.flow.bio/projects/new \
  -H "Authorization: Bearer <token>" \
  -H "Content-Type: application/json" \
  -d '{
    "name": "Cancer Genomics Study",
    "organism": "human",
    "description": "Comprehensive genomic analysis of tumor samples",
    "is_public": false
  }'

GraphQL Project Creation

mutation CreateProject($input: CreateProjectInput!) {
  createProject(input: $input) {
    id
    name
    organism {
      id
      name
    }
    created
    owner {
      username
    }
  }
}

# Variables
{
  "input": {
    "name": "Cancer Genomics Study",
    "organism": "9606",
    "description": "Comprehensive genomic analysis",
    "isPublic": false,
    "metadata": {
      "study_type": "case_control",
      "disease": "breast_cancer"
    }
  }
}

Create with Full Metadata

# Comprehensive project creation
project = client.create_project(
    name="Multi-omics Integration Study",
    organism="human",
    description="Integrating RNA-seq, ChIP-seq, and ATAC-seq data",
    is_public=False,
    metadata={
        # Study information
        "study_design": "longitudinal",
        "start_date": "2024-01-01",
        "expected_end_date": "2025-12-31",
        "study_phase": "data_collection",
        
        # Funding and compliance
        "funding_source": "NIH",
        "grant_number": "R01-GM123456",
        "irb_approval": "IRB-2024-001",
        "data_use_agreement": "DUA-2024-001",
        
        # Team information
        "principal_investigator": "Dr. Jane Smith",
        "co_investigators": ["Dr. John Doe", "Dr. Alice Johnson"],
        "institution": "University of Science",
        "department": "Genomics",
        
        # Analysis plan
        "primary_endpoints": ["differential_expression", "pathway_enrichment"],
        "statistical_plan": "DESeq2 for DE, GSEA for pathways",
        "target_sample_size": 100,
        
        # Keywords for discovery
        "keywords": ["cancer", "multi-omics", "integration", "systems_biology"],
        "therapeutic_area": "oncology"
    }
)

Managing Project Samples

Add Samples to Project

# Add existing samples
project.add_samples([sample1.id, sample2.id, sample3.id])

# Create and add samples
new_samples = []
for i in range(1, 5):
    sample = client.create_sample(
        name=f"{project.name}_Sample_{i}",
        organism=project.organism.id,
        sample_type="RNA",
        project=project.id  # Directly assign to project
    )
    new_samples.append(sample)

Remove Samples from Project

# Remove specific samples
project.remove_samples([sample1.id, sample2.id])

# Remove all samples (careful!)
all_sample_ids = [s.id for s in project.get_samples()]
project.remove_samples(all_sample_ids)

Transfer Samples Between Projects

def transfer_samples(source_project_id, target_project_id, sample_ids):
    """Transfer samples from one project to another"""
    
    source = client.get_project(source_project_id)
    target = client.get_project(target_project_id)
    
    # Verify organisms match
    if source.organism.id != target.organism.id:
        raise ValueError("Projects must have the same organism")
    
    # Remove from source
    source.remove_samples(sample_ids)
    
    # Add to target
    target.add_samples(sample_ids)
    
    print(f"Transferred {len(sample_ids)} samples")
    
# Transfer samples
transfer_samples(
    source_project_id=10,
    target_project_id=15,
    sample_ids=[123, 124, 125]
)

Retrieving Projects

Get Single Project

# Python client
project = client.get_project(10)

# Access project details
print(f"Name: {project.name}")
print(f"Samples: {project.sample_count}")
print(f"Executions: {project.execution_count}")
print(f"Total size: {project.total_size / 1e9:.1f} GB")

REST API Project Retrieval

curl -H "Authorization: Bearer <token>" \
  https://api.flow.bio/projects/10

GraphQL Project Query

query GetProjectDetails($id: ID!) {
  project(id: $id) {
    id
    name
    description
    organism {
      id
      name
      scientificName
    }
    isPublic
    created
    modified
    owner {
      username
      email
    }
    sampleCount
    executionCount
    totalSize
    metadata
    permissions {
      canEdit
      canShare
      canDelete
    }
    samples(limit: 100) {
      edges {
        node {
          id
          name
          sampleType {
            name
          }
          condition
        }
      }
    }
    executions(limit: 20) {
      edges {
        node {
          id
          name
          pipeline {
            name
          }
          status
        }
      }
    }
  }
}

List Projects

# Get your projects
my_projects = client.get_projects()

# Get shared projects
shared_projects = client.get_shared_projects()

# Get public projects
public_projects = client.get_public_projects(
    organism="human",
    limit=50
)

# Filter projects
cancer_projects = client.search_projects(
    query="cancer",
    has_samples=True,
    min_samples=10
)

Updating Projects

Update Project Information

# Update basic properties
project.update(
    name="Updated Project Name",
    description="More detailed description",
    is_public=True
)

# Update metadata
project.update_metadata({
    "status": "analysis_complete",
    "publication": "Nature 2024",
    "data_availability": "GEO GSE123456"
})

# Add tags
project.add_tags(["rna-seq", "time-course", "published"])

REST API Project Update

curl -X POST https://api.flow.bio/projects/10/update \
  -H "Authorization: Bearer <token>" \
  -H "Content-Type: application/json" \
  -d '{
    "description": "Updated description",
    "metadata": {
      "status": "completed",
      "publication": "Nature 2024"
    }
  }'

Batch Update Project Samples

# Update metadata for all project samples
def update_project_sample_metadata(project_id, metadata_updates):
    """Update metadata for all samples in a project"""
    
    project = client.get_project(project_id)
    samples = project.get_samples()
    
    for sample in samples:
        sample.merge_metadata(metadata_updates)
        print(f"Updated {sample.name}")
    
    return len(samples)

# Add batch information to all samples
updated = update_project_sample_metadata(
    project_id=10,
    metadata_updates={"batch": "2024_Q1", "platform": "NovaSeq"}
)

Project Organization

Project Hierarchies

# Create related projects
parent_project = client.create_project(
    name="Large Cohort Study",
    organism="human",
    metadata={"project_type": "parent"}
)

# Create sub-projects
sub_projects = []
for site in ["Site_A", "Site_B", "Site_C"]:
    sub_project = client.create_project(
        name=f"{parent_project.name} - {site}",
        organism="human",
        metadata={
            "project_type": "sub_project",
            "parent_project_id": parent_project.id,
            "site": site
        }
    )
    sub_projects.append(sub_project)

Project Templates

# Define project templates
PROJECT_TEMPLATES = {
    "rna_seq_study": {
        "metadata": {
            "study_type": "gene_expression",
            "assays": ["RNA-seq"],
            "analysis_pipeline": "nf-core/rnaseq",
            "min_samples": 3,
            "replicates": True
        },
        "required_sample_metadata": [
            "condition", "replicate", "batch"
        ]
    },
    "multi_omics_study": {
        "metadata": {
            "study_type": "integrative",
            "assays": ["RNA-seq", "ATAC-seq", "ChIP-seq"],
            "integration_method": "MOFA",
            "min_samples_per_assay": 10
        },
        "required_sample_metadata": [
            "assay_type", "condition", "timepoint"
        ]
    }
}

def create_project_from_template(name, organism, template_name):
    """Create project using predefined template"""
    
    template = PROJECT_TEMPLATES[template_name]
    
    project = client.create_project(
        name=name,
        organism=organism,
        metadata=template["metadata"]
    )
    
    # Store required metadata schema
    project.update_metadata({
        "required_sample_fields": template["required_sample_metadata"]
    })
    
    return project

Project Analytics

Project Statistics

def get_project_statistics(project_id):
    """Get comprehensive project statistics"""
    
    project = client.get_project(project_id)
    samples = project.get_samples()
    executions = project.get_executions()
    
    stats = {
        "basic": {
            "total_samples": len(samples),
            "total_executions": len(executions),
            "total_size_gb": project.total_size / 1e9
        },
        "by_sample_type": {},
        "by_condition": {},
        "execution_status": {
            "completed": 0,
            "running": 0,
            "failed": 0
        },
        "data_types": {}
    }
    
    # Analyze samples
    for sample in samples:
        # By type
        sample_type = sample.sample_type.name
        stats["by_sample_type"][sample_type] = \
            stats["by_sample_type"].get(sample_type, 0) + 1
        
        # By condition
        condition = sample.condition or "unspecified"
        stats["by_condition"][condition] = \
            stats["by_condition"].get(condition, 0) + 1
        
        # Data types
        for data in sample.data:
            data_type = data.data_type.name
            stats["data_types"][data_type] = \
                stats["data_types"].get(data_type, 0) + 1
    
    # Analyze executions
    for execution in executions:
        status = execution.status.lower()
        if status in stats["execution_status"]:
            stats["execution_status"][status] += 1
    
    return stats

# Generate report
stats = get_project_statistics(10)
print(f"Project Statistics:")
print(f"Total Samples: {stats['basic']['total_samples']}")
print(f"Total Size: {stats['basic']['total_size_gb']:.1f} GB")
print(f"Sample Types: {stats['by_sample_type']}")

Project Timeline

def create_project_timeline(project_id):
    """Create timeline of project activities"""
    
    project = client.get_project(project_id)
    events = []
    
    # Project creation
    events.append({
        "date": project.created,
        "type": "project_created",
        "description": f"Project '{project.name}' created"
    })
    
    # Sample additions
    for sample in project.get_samples():
        events.append({
            "date": sample.created,
            "type": "sample_added",
            "description": f"Sample '{sample.name}' added"
        })
    
    # Execution starts
    for execution in project.get_executions():
        events.append({
            "date": execution.created,
            "type": "execution_started",
            "description": f"Pipeline '{execution.pipeline.name}' started"
        })
        
        if execution.completed:
            events.append({
                "date": execution.completed,
                "type": "execution_completed",
                "description": f"Pipeline '{execution.pipeline.name}' completed"
            })
    
    # Sort by date
    events.sort(key=lambda x: x["date"])
    
    return events

Project Data Management

Export Project Data

def export_project_manifest(project_id, output_file="manifest.tsv"):
    """Export project manifest with all samples and data"""
    
    project = client.get_project(project_id)
    
    with open(output_file, 'w') as f:
        # Header
        f.write("sample_id\tsample_name\tcondition\tdata_id\tfilename\tsize\tmd5\n")
        
        # Write sample data
        for sample in project.get_samples():
            for data in sample.data:
                f.write(f"{sample.id}\t{sample.name}\t{sample.condition}\t")
                f.write(f"{data.id}\t{data.filename}\t{data.size}\t{data.md5}\n")
    
    print(f"Exported manifest to {output_file}")

def export_project_metadata(project_id, format="excel"):
    """Export all project metadata"""
    
    project = client.get_project(project_id)
    
    if format == "excel":
        import pandas as pd
        
        # Project info
        project_df = pd.DataFrame([{
            "Project ID": project.id,
            "Name": project.name,
            "Description": project.description,
            "Organism": project.organism.name,
            "Created": project.created,
            "Owner": project.owner.username
        }])
        
        # Sample info
        sample_data = []
        for sample in project.get_samples():
            row = {
                "Sample ID": sample.id,
                "Sample Name": sample.name,
                "Type": sample.sample_type.name,
                "Condition": sample.condition
            }
            row.update(sample.metadata)
            sample_data.append(row)
        
        samples_df = pd.DataFrame(sample_data)
        
        # Write to Excel
        with pd.ExcelWriter(f"{project.name}_metadata.xlsx") as writer:
            project_df.to_excel(writer, sheet_name="Project", index=False)
            samples_df.to_excel(writer, sheet_name="Samples", index=False)

Archive Project

def archive_project(project_id, archive_path="/archive"):
    """Archive project data and metadata"""
    
    import os
    import json
    import shutil
    
    project = client.get_project(project_id)
    
    # Create archive directory
    project_dir = os.path.join(archive_path, f"project_{project.id}")
    os.makedirs(project_dir, exist_ok=True)
    
    # Save project metadata
    with open(os.path.join(project_dir, "project.json"), 'w') as f:
        json.dump({
            "id": project.id,
            "name": project.name,
            "description": project.description,
            "organism": project.organism.name,
            "metadata": project.metadata,
            "created": str(project.created),
            "archived": str(datetime.now())
        }, f, indent=2)
    
    # Create sample directories and download data
    for sample in project.get_samples():
        sample_dir = os.path.join(project_dir, f"sample_{sample.id}")
        os.makedirs(sample_dir, exist_ok=True)
        
        # Save sample metadata
        with open(os.path.join(sample_dir, "metadata.json"), 'w') as f:
            json.dump({
                "id": sample.id,
                "name": sample.name,
                "metadata": sample.metadata
            }, f, indent=2)
        
        # Download data files
        for data in sample.data:
            output_path = os.path.join(sample_dir, data.filename)
            client.download_file(data.download_url, output_path)
            print(f"Downloaded {data.filename}")
    
    print(f"Project archived to {project_dir}")

# Share with users
project.share(
    users=["collaborator1@example.com", "collaborator2@example.com"],
    permission="read"
)

# Share with groups
project.share(
    groups=["bioinformatics-team"],
    permission="edit"
)

# Make project public
project.update(is_public=True)

# Share with specific permissions
project.share_advanced({
    "users": {
        "analyst@example.com": "read",
        "scientist@example.com": "edit",
        "pi@example.com": "share"
    },
    "groups": {
        "wet-lab": "read",
        "dry-lab": "edit"
    }
})

Project Access Control

def audit_project_access(project_id):
    """Audit who has access to a project"""
    
    project = client.get_project(project_id)
    
    access_report = {
        "owner": project.owner.username,
        "is_public": project.is_public,
        "shared_users": [],
        "shared_groups": [],
        "total_users": 0
    }
    
    # Get directly shared users
    for user in project.shared_with_users:
        access_report["shared_users"].append({
            "username": user.username,
            "permission": user.permission_level
        })
    
    # Get shared groups
    for group in project.shared_with_groups:
        access_report["shared_groups"].append({
            "name": group.name,
            "permission": group.permission_level,
            "member_count": group.member_count
        })
        access_report["total_users"] += group.member_count
    
    return access_report

Project Workflows

Standard Project Workflow

def setup_standard_project(
    name,
    organism,
    sample_annotations_file,
    pipeline="RNA-seq"
):
    """Set up a standard analysis project"""
    
    # 1. Create project
    project = client.create_project(
        name=name,
        organism=organism,
        metadata={"pipeline": pipeline}
    )
    print(f"Created project: {project.name}")
    
    # 2. Import samples from annotation
    samples = import_samples_from_excel(
        sample_annotations_file,
        project_id=project.id
    )
    print(f"Imported {len(samples)} samples")
    
    # 3. Validate samples
    issues = []
    for sample in samples:
        validation = validate_sample(sample)
        if not validation["valid"]:
            issues.append({
                "sample": sample.name,
                "issues": validation["issues"]
            })
    
    if issues:
        print(f"Warning: {len(issues)} samples have validation issues")
    
    # 4. Set up analysis groups
    conditions = set(s.condition for s in samples if s.condition)
    project.update_metadata({
        "conditions": list(conditions),
        "ready_for_analysis": len(issues) == 0
    })
    
    return project, samples, issues

Multi-Site Project Coordination

class MultiSiteProject:
    """Coordinate projects across multiple sites"""
    
    def __init__(self, master_project_name, sites):
        self.sites = sites
        self.master_project = client.create_project(
            name=master_project_name,
            organism="human",
            metadata={"project_type": "multi_site", "sites": sites}
        )
        self.site_projects = {}
        
    def create_site_project(self, site_name):
        """Create project for a specific site"""
        
        project = client.create_project(
            name=f"{self.master_project.name} - {site_name}",
            organism=self.master_project.organism.id,
            metadata={
                "master_project_id": self.master_project.id,
                "site": site_name
            }
        )
        
        self.site_projects[site_name] = project
        return project
    
    def aggregate_statistics(self):
        """Aggregate statistics across all sites"""
        
        total_stats = {
            "total_samples": 0,
            "total_executions": 0,
            "by_site": {}
        }
        
        for site, project in self.site_projects.items():
            site_stats = get_project_statistics(project.id)
            total_stats["total_samples"] += site_stats["basic"]["total_samples"]
            total_stats["total_executions"] += site_stats["basic"]["total_executions"]
            total_stats["by_site"][site] = site_stats
        
        return total_stats

Project Lifecycle

Project Phases

PROJECT_PHASES = [
    "planning",
    "sample_collection", 
    "data_generation",
    "quality_control",
    "analysis",
    "interpretation",
    "publication",
    "archived"
]

def update_project_phase(project_id, new_phase):
    """Update project phase and perform phase-specific actions"""
    
    if new_phase not in PROJECT_PHASES:
        raise ValueError(f"Invalid phase: {new_phase}")
    
    project = client.get_project(project_id)
    old_phase = project.metadata.get("phase", "planning")
    
    project.update_metadata({
        "phase": new_phase,
        "phase_updated": str(datetime.now()),
        "phase_history": project.metadata.get("phase_history", []) + [{
            "from": old_phase,
            "to": new_phase,
            "date": str(datetime.now())
        }]
    })
    
    # Phase-specific actions
    if new_phase == "quality_control":
        run_project_qc(project_id)
    elif new_phase == "publication":
        prepare_publication_package(project_id)
    elif new_phase == "archived":
        archive_project(project_id)
    
    print(f"Project phase updated: {old_phase} → {new_phase}")

Project Completion Checklist

def check_project_completion(project_id):
    """Check if project meets completion criteria"""
    
    project = client.get_project(project_id)
    samples = project.get_samples()
    executions = project.get_executions()
    
    checklist = {
        "has_samples": len(samples) > 0,
        "all_samples_have_data": all(len(s.data) > 0 for s in samples),
        "has_executions": len(executions) > 0,
        "all_executions_complete": all(
            e.status == "completed" for e in executions
        ),
        "has_metadata": bool(project.metadata),
        "has_description": bool(project.description),
        "qc_complete": project.metadata.get("qc_status") == "passed"
    }
    
    completion_score = sum(checklist.values()) / len(checklist)
    
    return {
        "complete": completion_score == 1.0,
        "score": completion_score,
        "checklist": checklist,
        "missing": [k for k, v in checklist.items() if not v]
    }

Best Practices

1. Project Naming Conventions

def generate_project_name(
    investigator_initials,
    year,
    study_type,
    organism,
    sequence_number
):
    """Generate standardized project names"""
    
    # Format: INI_YEAR_TYPE_ORG_NUM
    # Example: JS_2024_RNAseq_Hs_001
    
    organism_codes = {
        "human": "Hs",
        "mouse": "Mm",
        "rat": "Rn",
        "zebrafish": "Dr"
    }
    
    org_code = organism_codes.get(organism, organism[:2].upper())
    
    return f"{investigator_initials}_{year}_{study_type}_{org_code}_{sequence_number:03d}"

2. Project Documentation

def create_project_readme(project_id):
    """Generate README for project"""
    
    project = client.get_project(project_id)
    stats = get_project_statistics(project_id)
    
    readme = f"""# {project.name}

## Description
{project.description}

## Project Information
- **ID**: {project.id}
- **Organism**: {project.organism.name}
- **Created**: {project.created}
- **Owner**: {project.owner.username}
- **Status**: {project.metadata.get('phase', 'active')}

## Statistics
- **Total Samples**: {stats['basic']['total_samples']}
- **Total Executions**: {stats['basic']['total_executions']}
- **Total Size**: {stats['basic']['total_size_gb']:.1f} GB

## Sample Types
{chr(10).join(f"- {k}: {v}" for k, v in stats['by_sample_type'].items())}

## Metadata
{chr(10).join(f"- **{k}**: {v}" for k, v in project.metadata.items())}

## Access
- Public: {'Yes' if project.is_public else 'No'}
- Shared with {len(project.shared_with_users)} users and {len(project.shared_with_groups)} groups

Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
"""
    
    return readme

3. Project Validation

def validate_project_structure(project_id):
    """Validate project follows best practices"""
    
    project = client.get_project(project_id)
    samples = project.get_samples()
    
    validation_results = {
        "errors": [],
        "warnings": [],
        "info": []
    }
    
    # Check project metadata
    required_metadata = ["study_type", "pi", "institution"]
    for field in required_metadata:
        if field not in project.metadata:
            validation_results["warnings"].append(
                f"Missing recommended metadata field: {field}"
            )
    
    # Check sample consistency
    if samples:
        organisms = set(s.organism.id for s in samples)
        if len(organisms) > 1:
            validation_results["errors"].append(
                "Multiple organisms in project"
            )
        
        # Check replicate groups
        replicate_groups = set(s.replicate_group for s in samples if s.replicate_group)
        for group in replicate_groups:
            group_samples = [s for s in samples if s.replicate_group == group]
            if len(group_samples) < 2:
                validation_results["warnings"].append(
                    f"Replicate group '{group}' has only 1 sample"
                )
    
    # Check for orphaned data
    all_data = []
    for sample in samples:
        all_data.extend(sample.data)
    
    if not all_data and samples:
        validation_results["warnings"].append(
            "Project has samples but no data files"
        )
    
    return validation_results

Next Steps

Sample Management - Managing samples within projects
Pipeline Execution - Running analyses on projects
Search API - Finding and filtering projects
Permissions Guide - Project sharing and access