Resource APIs
Project Management API
Projects in Flow provide a way to organize related samples, executions, and data into cohesive units. This guide covers all aspects of project management through the API.
Understanding Projects
Projects serve as containers for organizing biological studies and experiments.
Project Structure
# A typical project contains:
project = {
"id": 10,
"name": "Mouse RNA-seq Time Course",
"description": "Investigating gene expression changes over time",
"organism": {"id": "10090", "name": "Mus musculus"},
"is_public": False,
"created": "2024-01-01T10:00:00Z",
"modified": "2024-01-15T14:30:00Z",
"owner": {"id": 1, "username": "researcher"},
"sample_count": 24,
"execution_count": 8,
"total_size": 125_000_000_000, # 125 GB
"metadata": {
"study_type": "time_course",
"funding": "NIH R01-12345",
"publication": "doi:10.1234/journal.2024.001",
"pi": "Dr. Jane Smith",
"institution": "University of Science"
}
}
Creating Projects
Basic Project Creation
# Python client
project = client.create_project(
name="Cancer Genomics Study",
organism="human",
description="Comprehensive genomic analysis of tumor samples"
)
print(f"Created project: {project.id}")
REST API Project Creation
curl -X POST https://api.flow.bio/projects/new \
-H "Authorization: Bearer <token>" \
-H "Content-Type: application/json" \
-d '{
"name": "Cancer Genomics Study",
"organism": "human",
"description": "Comprehensive genomic analysis of tumor samples",
"is_public": false
}'
GraphQL Project Creation
mutation CreateProject($input: CreateProjectInput!) {
createProject(input: $input) {
id
name
organism {
id
name
}
created
owner {
username
}
}
}
# Variables
{
"input": {
"name": "Cancer Genomics Study",
"organism": "9606",
"description": "Comprehensive genomic analysis",
"isPublic": false,
"metadata": {
"study_type": "case_control",
"disease": "breast_cancer"
}
}
}
Create with Full Metadata
# Comprehensive project creation
project = client.create_project(
name="Multi-omics Integration Study",
organism="human",
description="Integrating RNA-seq, ChIP-seq, and ATAC-seq data",
is_public=False,
metadata={
# Study information
"study_design": "longitudinal",
"start_date": "2024-01-01",
"expected_end_date": "2025-12-31",
"study_phase": "data_collection",
# Funding and compliance
"funding_source": "NIH",
"grant_number": "R01-GM123456",
"irb_approval": "IRB-2024-001",
"data_use_agreement": "DUA-2024-001",
# Team information
"principal_investigator": "Dr. Jane Smith",
"co_investigators": ["Dr. John Doe", "Dr. Alice Johnson"],
"institution": "University of Science",
"department": "Genomics",
# Analysis plan
"primary_endpoints": ["differential_expression", "pathway_enrichment"],
"statistical_plan": "DESeq2 for DE, GSEA for pathways",
"target_sample_size": 100,
# Keywords for discovery
"keywords": ["cancer", "multi-omics", "integration", "systems_biology"],
"therapeutic_area": "oncology"
}
)
Managing Project Samples
Add Samples to Project
# Add existing samples
project.add_samples([sample1.id, sample2.id, sample3.id])
# Create and add samples
new_samples = []
for i in range(1, 5):
sample = client.create_sample(
name=f"{project.name}_Sample_{i}",
organism=project.organism.id,
sample_type="RNA",
project=project.id # Directly assign to project
)
new_samples.append(sample)
Remove Samples from Project
# Remove specific samples
project.remove_samples([sample1.id, sample2.id])
# Remove all samples (careful!)
all_sample_ids = [s.id for s in project.get_samples()]
project.remove_samples(all_sample_ids)
Transfer Samples Between Projects
def transfer_samples(source_project_id, target_project_id, sample_ids):
"""Transfer samples from one project to another"""
source = client.get_project(source_project_id)
target = client.get_project(target_project_id)
# Verify organisms match
if source.organism.id != target.organism.id:
raise ValueError("Projects must have the same organism")
# Remove from source
source.remove_samples(sample_ids)
# Add to target
target.add_samples(sample_ids)
print(f"Transferred {len(sample_ids)} samples")
# Transfer samples
transfer_samples(
source_project_id=10,
target_project_id=15,
sample_ids=[123, 124, 125]
)
Retrieving Projects
Get Single Project
# Python client
project = client.get_project(10)
# Access project details
print(f"Name: {project.name}")
print(f"Samples: {project.sample_count}")
print(f"Executions: {project.execution_count}")
print(f"Total size: {project.total_size / 1e9:.1f} GB")
REST API Project Retrieval
curl -H "Authorization: Bearer <token>" \
https://api.flow.bio/projects/10
GraphQL Project Query
query GetProjectDetails($id: ID!) {
project(id: $id) {
id
name
description
organism {
id
name
scientificName
}
isPublic
created
modified
owner {
username
email
}
sampleCount
executionCount
totalSize
metadata
permissions {
canEdit
canShare
canDelete
}
samples(limit: 100) {
edges {
node {
id
name
sampleType {
name
}
condition
}
}
}
executions(limit: 20) {
edges {
node {
id
name
pipeline {
name
}
status
}
}
}
}
}
List Projects
# Get your projects
my_projects = client.get_projects()
# Get shared projects
shared_projects = client.get_shared_projects()
# Get public projects
public_projects = client.get_public_projects(
organism="human",
limit=50
)
# Filter projects
cancer_projects = client.search_projects(
query="cancer",
has_samples=True,
min_samples=10
)
Updating Projects
Update Project Information
# Update basic properties
project.update(
name="Updated Project Name",
description="More detailed description",
is_public=True
)
# Update metadata
project.update_metadata({
"status": "analysis_complete",
"publication": "Nature 2024",
"data_availability": "GEO GSE123456"
})
# Add tags
project.add_tags(["rna-seq", "time-course", "published"])
REST API Project Update
curl -X POST https://api.flow.bio/projects/10/update \
-H "Authorization: Bearer <token>" \
-H "Content-Type: application/json" \
-d '{
"description": "Updated description",
"metadata": {
"status": "completed",
"publication": "Nature 2024"
}
}'
Batch Update Project Samples
# Update metadata for all project samples
def update_project_sample_metadata(project_id, metadata_updates):
"""Update metadata for all samples in a project"""
project = client.get_project(project_id)
samples = project.get_samples()
for sample in samples:
sample.merge_metadata(metadata_updates)
print(f"Updated {sample.name}")
return len(samples)
# Add batch information to all samples
updated = update_project_sample_metadata(
project_id=10,
metadata_updates={"batch": "2024_Q1", "platform": "NovaSeq"}
)
Project Organization
Project Hierarchies
# Create related projects
parent_project = client.create_project(
name="Large Cohort Study",
organism="human",
metadata={"project_type": "parent"}
)
# Create sub-projects
sub_projects = []
for site in ["Site_A", "Site_B", "Site_C"]:
sub_project = client.create_project(
name=f"{parent_project.name} - {site}",
organism="human",
metadata={
"project_type": "sub_project",
"parent_project_id": parent_project.id,
"site": site
}
)
sub_projects.append(sub_project)
Project Templates
# Define project templates
PROJECT_TEMPLATES = {
"rna_seq_study": {
"metadata": {
"study_type": "gene_expression",
"assays": ["RNA-seq"],
"analysis_pipeline": "nf-core/rnaseq",
"min_samples": 3,
"replicates": True
},
"required_sample_metadata": [
"condition", "replicate", "batch"
]
},
"multi_omics_study": {
"metadata": {
"study_type": "integrative",
"assays": ["RNA-seq", "ATAC-seq", "ChIP-seq"],
"integration_method": "MOFA",
"min_samples_per_assay": 10
},
"required_sample_metadata": [
"assay_type", "condition", "timepoint"
]
}
}
def create_project_from_template(name, organism, template_name):
"""Create project using predefined template"""
template = PROJECT_TEMPLATES[template_name]
project = client.create_project(
name=name,
organism=organism,
metadata=template["metadata"]
)
# Store required metadata schema
project.update_metadata({
"required_sample_fields": template["required_sample_metadata"]
})
return project
Project Analytics
Project Statistics
def get_project_statistics(project_id):
"""Get comprehensive project statistics"""
project = client.get_project(project_id)
samples = project.get_samples()
executions = project.get_executions()
stats = {
"basic": {
"total_samples": len(samples),
"total_executions": len(executions),
"total_size_gb": project.total_size / 1e9
},
"by_sample_type": {},
"by_condition": {},
"execution_status": {
"completed": 0,
"running": 0,
"failed": 0
},
"data_types": {}
}
# Analyze samples
for sample in samples:
# By type
sample_type = sample.sample_type.name
stats["by_sample_type"][sample_type] = \
stats["by_sample_type"].get(sample_type, 0) + 1
# By condition
condition = sample.condition or "unspecified"
stats["by_condition"][condition] = \
stats["by_condition"].get(condition, 0) + 1
# Data types
for data in sample.data:
data_type = data.data_type.name
stats["data_types"][data_type] = \
stats["data_types"].get(data_type, 0) + 1
# Analyze executions
for execution in executions:
status = execution.status.lower()
if status in stats["execution_status"]:
stats["execution_status"][status] += 1
return stats
# Generate report
stats = get_project_statistics(10)
print(f"Project Statistics:")
print(f"Total Samples: {stats['basic']['total_samples']}")
print(f"Total Size: {stats['basic']['total_size_gb']:.1f} GB")
print(f"Sample Types: {stats['by_sample_type']}")
Project Timeline
def create_project_timeline(project_id):
"""Create timeline of project activities"""
project = client.get_project(project_id)
events = []
# Project creation
events.append({
"date": project.created,
"type": "project_created",
"description": f"Project '{project.name}' created"
})
# Sample additions
for sample in project.get_samples():
events.append({
"date": sample.created,
"type": "sample_added",
"description": f"Sample '{sample.name}' added"
})
# Execution starts
for execution in project.get_executions():
events.append({
"date": execution.created,
"type": "execution_started",
"description": f"Pipeline '{execution.pipeline.name}' started"
})
if execution.completed:
events.append({
"date": execution.completed,
"type": "execution_completed",
"description": f"Pipeline '{execution.pipeline.name}' completed"
})
# Sort by date
events.sort(key=lambda x: x["date"])
return events
Project Data Management
Export Project Data
def export_project_manifest(project_id, output_file="manifest.tsv"):
"""Export project manifest with all samples and data"""
project = client.get_project(project_id)
with open(output_file, 'w') as f:
# Header
f.write("sample_id\tsample_name\tcondition\tdata_id\tfilename\tsize\tmd5\n")
# Write sample data
for sample in project.get_samples():
for data in sample.data:
f.write(f"{sample.id}\t{sample.name}\t{sample.condition}\t")
f.write(f"{data.id}\t{data.filename}\t{data.size}\t{data.md5}\n")
print(f"Exported manifest to {output_file}")
def export_project_metadata(project_id, format="excel"):
"""Export all project metadata"""
project = client.get_project(project_id)
if format == "excel":
import pandas as pd
# Project info
project_df = pd.DataFrame([{
"Project ID": project.id,
"Name": project.name,
"Description": project.description,
"Organism": project.organism.name,
"Created": project.created,
"Owner": project.owner.username
}])
# Sample info
sample_data = []
for sample in project.get_samples():
row = {
"Sample ID": sample.id,
"Sample Name": sample.name,
"Type": sample.sample_type.name,
"Condition": sample.condition
}
row.update(sample.metadata)
sample_data.append(row)
samples_df = pd.DataFrame(sample_data)
# Write to Excel
with pd.ExcelWriter(f"{project.name}_metadata.xlsx") as writer:
project_df.to_excel(writer, sheet_name="Project", index=False)
samples_df.to_excel(writer, sheet_name="Samples", index=False)
Archive Project
def archive_project(project_id, archive_path="/archive"):
"""Archive project data and metadata"""
import os
import json
import shutil
project = client.get_project(project_id)
# Create archive directory
project_dir = os.path.join(archive_path, f"project_{project.id}")
os.makedirs(project_dir, exist_ok=True)
# Save project metadata
with open(os.path.join(project_dir, "project.json"), 'w') as f:
json.dump({
"id": project.id,
"name": project.name,
"description": project.description,
"organism": project.organism.name,
"metadata": project.metadata,
"created": str(project.created),
"archived": str(datetime.now())
}, f, indent=2)
# Create sample directories and download data
for sample in project.get_samples():
sample_dir = os.path.join(project_dir, f"sample_{sample.id}")
os.makedirs(sample_dir, exist_ok=True)
# Save sample metadata
with open(os.path.join(sample_dir, "metadata.json"), 'w') as f:
json.dump({
"id": sample.id,
"name": sample.name,
"metadata": sample.metadata
}, f, indent=2)
# Download data files
for data in sample.data:
output_path = os.path.join(sample_dir, data.filename)
client.download_file(data.download_url, output_path)
print(f"Downloaded {data.filename}")
print(f"Project archived to {project_dir}")
Project Sharing
Share Projects
# Share with users
project.share(
users=["collaborator1@example.com", "collaborator2@example.com"],
permission="read"
)
# Share with groups
project.share(
groups=["bioinformatics-team"],
permission="edit"
)
# Make project public
project.update(is_public=True)
# Share with specific permissions
project.share_advanced({
"users": {
"analyst@example.com": "read",
"scientist@example.com": "edit",
"pi@example.com": "share"
},
"groups": {
"wet-lab": "read",
"dry-lab": "edit"
}
})
Project Access Control
def audit_project_access(project_id):
"""Audit who has access to a project"""
project = client.get_project(project_id)
access_report = {
"owner": project.owner.username,
"is_public": project.is_public,
"shared_users": [],
"shared_groups": [],
"total_users": 0
}
# Get directly shared users
for user in project.shared_with_users:
access_report["shared_users"].append({
"username": user.username,
"permission": user.permission_level
})
# Get shared groups
for group in project.shared_with_groups:
access_report["shared_groups"].append({
"name": group.name,
"permission": group.permission_level,
"member_count": group.member_count
})
access_report["total_users"] += group.member_count
return access_report
Project Workflows
Standard Project Workflow
def setup_standard_project(
name,
organism,
sample_annotations_file,
pipeline="RNA-seq"
):
"""Set up a standard analysis project"""
# 1. Create project
project = client.create_project(
name=name,
organism=organism,
metadata={"pipeline": pipeline}
)
print(f"Created project: {project.name}")
# 2. Import samples from annotation
samples = import_samples_from_excel(
sample_annotations_file,
project_id=project.id
)
print(f"Imported {len(samples)} samples")
# 3. Validate samples
issues = []
for sample in samples:
validation = validate_sample(sample)
if not validation["valid"]:
issues.append({
"sample": sample.name,
"issues": validation["issues"]
})
if issues:
print(f"Warning: {len(issues)} samples have validation issues")
# 4. Set up analysis groups
conditions = set(s.condition for s in samples if s.condition)
project.update_metadata({
"conditions": list(conditions),
"ready_for_analysis": len(issues) == 0
})
return project, samples, issues
Multi-Site Project Coordination
class MultiSiteProject:
"""Coordinate projects across multiple sites"""
def __init__(self, master_project_name, sites):
self.sites = sites
self.master_project = client.create_project(
name=master_project_name,
organism="human",
metadata={"project_type": "multi_site", "sites": sites}
)
self.site_projects = {}
def create_site_project(self, site_name):
"""Create project for a specific site"""
project = client.create_project(
name=f"{self.master_project.name} - {site_name}",
organism=self.master_project.organism.id,
metadata={
"master_project_id": self.master_project.id,
"site": site_name
}
)
self.site_projects[site_name] = project
return project
def aggregate_statistics(self):
"""Aggregate statistics across all sites"""
total_stats = {
"total_samples": 0,
"total_executions": 0,
"by_site": {}
}
for site, project in self.site_projects.items():
site_stats = get_project_statistics(project.id)
total_stats["total_samples"] += site_stats["basic"]["total_samples"]
total_stats["total_executions"] += site_stats["basic"]["total_executions"]
total_stats["by_site"][site] = site_stats
return total_stats
Project Lifecycle
Project Phases
PROJECT_PHASES = [
"planning",
"sample_collection",
"data_generation",
"quality_control",
"analysis",
"interpretation",
"publication",
"archived"
]
def update_project_phase(project_id, new_phase):
"""Update project phase and perform phase-specific actions"""
if new_phase not in PROJECT_PHASES:
raise ValueError(f"Invalid phase: {new_phase}")
project = client.get_project(project_id)
old_phase = project.metadata.get("phase", "planning")
project.update_metadata({
"phase": new_phase,
"phase_updated": str(datetime.now()),
"phase_history": project.metadata.get("phase_history", []) + [{
"from": old_phase,
"to": new_phase,
"date": str(datetime.now())
}]
})
# Phase-specific actions
if new_phase == "quality_control":
run_project_qc(project_id)
elif new_phase == "publication":
prepare_publication_package(project_id)
elif new_phase == "archived":
archive_project(project_id)
print(f"Project phase updated: {old_phase} → {new_phase}")
Project Completion Checklist
def check_project_completion(project_id):
"""Check if project meets completion criteria"""
project = client.get_project(project_id)
samples = project.get_samples()
executions = project.get_executions()
checklist = {
"has_samples": len(samples) > 0,
"all_samples_have_data": all(len(s.data) > 0 for s in samples),
"has_executions": len(executions) > 0,
"all_executions_complete": all(
e.status == "completed" for e in executions
),
"has_metadata": bool(project.metadata),
"has_description": bool(project.description),
"qc_complete": project.metadata.get("qc_status") == "passed"
}
completion_score = sum(checklist.values()) / len(checklist)
return {
"complete": completion_score == 1.0,
"score": completion_score,
"checklist": checklist,
"missing": [k for k, v in checklist.items() if not v]
}
Best Practices
1. Project Naming Conventions
def generate_project_name(
investigator_initials,
year,
study_type,
organism,
sequence_number
):
"""Generate standardized project names"""
# Format: INI_YEAR_TYPE_ORG_NUM
# Example: JS_2024_RNAseq_Hs_001
organism_codes = {
"human": "Hs",
"mouse": "Mm",
"rat": "Rn",
"zebrafish": "Dr"
}
org_code = organism_codes.get(organism, organism[:2].upper())
return f"{investigator_initials}_{year}_{study_type}_{org_code}_{sequence_number:03d}"
2. Project Documentation
def create_project_readme(project_id):
"""Generate README for project"""
project = client.get_project(project_id)
stats = get_project_statistics(project_id)
readme = f"""# {project.name}
## Description
{project.description}
## Project Information
- **ID**: {project.id}
- **Organism**: {project.organism.name}
- **Created**: {project.created}
- **Owner**: {project.owner.username}
- **Status**: {project.metadata.get('phase', 'active')}
## Statistics
- **Total Samples**: {stats['basic']['total_samples']}
- **Total Executions**: {stats['basic']['total_executions']}
- **Total Size**: {stats['basic']['total_size_gb']:.1f} GB
## Sample Types
{chr(10).join(f"- {k}: {v}" for k, v in stats['by_sample_type'].items())}
## Metadata
{chr(10).join(f"- **{k}**: {v}" for k, v in project.metadata.items())}
## Access
- Public: {'Yes' if project.is_public else 'No'}
- Shared with {len(project.shared_with_users)} users and {len(project.shared_with_groups)} groups
Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
"""
return readme
3. Project Validation
def validate_project_structure(project_id):
"""Validate project follows best practices"""
project = client.get_project(project_id)
samples = project.get_samples()
validation_results = {
"errors": [],
"warnings": [],
"info": []
}
# Check project metadata
required_metadata = ["study_type", "pi", "institution"]
for field in required_metadata:
if field not in project.metadata:
validation_results["warnings"].append(
f"Missing recommended metadata field: {field}"
)
# Check sample consistency
if samples:
organisms = set(s.organism.id for s in samples)
if len(organisms) > 1:
validation_results["errors"].append(
"Multiple organisms in project"
)
# Check replicate groups
replicate_groups = set(s.replicate_group for s in samples if s.replicate_group)
for group in replicate_groups:
group_samples = [s for s in samples if s.replicate_group == group]
if len(group_samples) < 2:
validation_results["warnings"].append(
f"Replicate group '{group}' has only 1 sample"
)
# Check for orphaned data
all_data = []
for sample in samples:
all_data.extend(sample.data)
if not all_data and samples:
validation_results["warnings"].append(
"Project has samples but no data files"
)
return validation_results
Next Steps
- Sample Management - Managing samples within projects
- Pipeline Execution - Running analyses on projects
- Search API - Finding and filtering projects
- Permissions Guide - Project sharing and access