Flow Logo

API Guides

Search API

Flow provides powerful search capabilities to find samples, projects, executions, and data across the platform. This guide covers all search methods, from quick searches to advanced filtering.


The fastest way to search across all resource types.

curl -H "Authorization: Bearer <token>" \
  "https://api.flow.bio/search?q=RNA-seq"

Response:

{
  "results": {
    "samples": [
      {
        "id": 123,
        "name": "RNA-seq Sample 1",
        "type": "sample",
        "url": "/samples/123/",
        "highlight": "sample_123_<em>RNA-seq</em>_treated"
      }
    ],
    "projects": [
      {
        "id": 10,
        "name": "Mouse RNA-seq Study",
        "type": "project",
        "url": "/projects/10/"
      }
    ],
    "executions": [],
    "data": []
  },
  "total_count": 2,
  "query": "RNA-seq",
  "search_time_ms": 45
}
# Search across all resources
results = client.search("BRCA1")

print(f"Found {results['total_count']} results:")
print(f"- {len(results['samples'])} samples")
print(f"- {len(results['projects'])} projects")
print(f"- {len(results['executions'])} executions")
print(f"- {len(results['data'])} data files")

# Access specific results
for sample in results['samples']:
    print(f"Sample: {sample.name} (ID: {sample.id})")
query QuickSearch($query: String!) {
  search(query: $query) {
    totalCount
    samples {
      id
      name
      organism {
        name
      }
    }
    projects {
      id
      name
      sampleCount
    }
    executions {
      id
      name
      status
    }
    data {
      id
      filename
      size
    }
  }
}

Advanced Sample Filtering

# Search with multiple filters
samples = client.search_samples(
    query="cancer",  # Text search
    organism="human",
    sample_type="RNA",
    project=project_id,
    created_after="2024-01-01",
    created_before="2024-12-31",
    has_data=True,
    min_data_files=2,
    metadata_filters={
        "treatment": "drug_A",
        "concentration": "10uM"
    }
)

for sample in samples:
    print(f"{sample.name}: {sample.organism} - {sample.created}")
curl -H "Authorization: Bearer <token>" \
  "https://api.flow.bio/samples/search?filter=tumor&organism=human&sample_type=RNA&limit=50"
query SearchSamples(
  $query: String
  $filters: SampleFilterInput
  $sort: SampleSortInput
  $pagination: PaginationInput
) {
  searchSamples(
    query: $query
    filters: $filters
    sort: $sort
    pagination: $pagination
  ) {
    totalCount
    edges {
      node {
        id
        name
        organism {
          name
        }
        sampleType {
          name
        }
        metadata
        created
      }
      highlight {
        field
        fragments
      }
    }
    facets {
      organism {
        value
        count
      }
      sampleType {
        value
        count
      }
    }
  }
}

Sample Search Filters

FilterTypeDescription
querystringFull-text search in name, description, metadata
organismID/stringFilter by organism
sample_typeID/stringFilter by sample type
projectIDFilter by project
ownerID/stringFilter by owner username
created_afterdatetimeSamples created after date
created_beforedatetimeSamples created before date
modified_afterdatetimeSamples modified after date
has_databooleanOnly samples with data files
has_executionsbooleanOnly samples used in pipelines
metadataobjectFilter by metadata fields

Search Public Projects

# Search public projects
public_projects = client.search_projects(
    query="RNA-seq mouse",
    is_public=True,
    organism="mouse",
    has_samples=True,
    min_samples=10
)

for project in public_projects:
    print(f"{project.name}: {project.sample_count} samples")

Project Search API

# REST API
curl -H "Authorization: Bearer <token>" \
  "https://api.flow.bio/projects/search?filter=cancer&organism=human&has_samples=true"
query SearchProjects(
  $query: String
  $isPublic: Boolean
  $organism: ID
  $hasExecutions: Boolean
) {
  searchProjects(
    query: $query
    isPublic: $isPublic
    organism: $organism
    hasExecutions: $hasExecutions
  ) {
    edges {
      node {
        id
        name
        description
        organism {
          name
        }
        sampleCount
        executionCount
        isPublic
        owner {
          username
        }
      }
    }
  }
}

Search by Filename Pattern

# Search using wildcards
fastq_files = client.search_data(
    pattern="*_R1.fastq.gz",
    data_type="FASTQ"
)

# Search by size
large_files = client.search_data(
    min_size=1_000_000_000,  # Files > 1GB
    max_size=10_000_000_000,  # Files < 10GB
    data_type="BAM"
)

# Search by sample metadata
treated_data = client.search_data(
    sample_metadata={
        "treatment": "drug_A",
        "timepoint": "24h"
    }
)
curl -H "Authorization: Bearer <token>" \
  "https://api.flow.bio/data/search?pattern=*.bam&size_min=1000000000"
query SearchDataFiles(
  $pattern: String!
  $dataType: ID
  $sizeRange: SizeRangeInput
  $sample: ID
) {
  searchData(
    pattern: $pattern
    dataType: $dataType
    sizeRange: $sizeRange
    sample: $sample
  ) {
    edges {
      node {
        id
        filename
        size
        dataType {
          name
        }
        sample {
          name
        }
        created
        md5
      }
    }
    totalSize
    fileCount
  }
}

Search Pipeline Runs

# Find completed RNA-seq runs
executions = client.search_executions(
    pipeline="RNA-seq",
    status="completed",
    created_after="2024-01-01",
    min_duration=3600,  # Runs longer than 1 hour
    max_duration=28800  # Runs shorter than 8 hours
)

# Search by parameters
de_analysis = client.search_executions(
    pipeline="RNA-seq",
    parameters={
        "aligner": "star",
        "perform_de": True
    }
)

Execution Search Filters

query SearchExecutions(
  $pipeline: ID
  $status: ExecutionStatus
  $owner: ID
  $dateRange: DateRangeInput
) {
  searchExecutions(
    pipeline: $pipeline
    status: $status
    owner: $owner
    dateRange: $dateRange
  ) {
    edges {
      node {
        id
        name
        status
        progress
        pipeline {
          name
        }
        duration
        created
        parameters
      }
    }
    stats {
      totalCount
      runningCount
      completedCount
      failedCount
      averageDuration
    }
  }
}

Search Syntax

Flow supports advanced search syntax:

SyntaxExampleDescription
Exact phrase"RNA sequencing"Match exact phrase
OR operatorRNA OR DNAMatch either term
AND operatormouse AND liverMatch both terms
NOT operatorcancer NOT lungExclude term
WildcardBRCA*Match BRCA1, BRCA2, etc.
Field searchorganism:humanSearch specific field
Rangesize:[1GB TO 10GB]Numeric ranges
# Search specific fields
results = client.search_samples(
    query='name:"Sample_001" AND organism:human'
)

# Search metadata fields
results = client.search_samples(
    query='metadata.treatment:"drug_A" AND metadata.dose:[5 TO 15]'
)

# Complex queries
results = client.search(
    query='(organism:human OR organism:mouse) AND type:RNA* NOT project:test*'
)

Search with Facets

Get aggregated counts for filtering:

REST API with Facets

curl -H "Authorization: Bearer <token>" \
  "https://api.flow.bio/search?q=cancer&facets=organism,sample_type,data_type"

Response with facets:

{
  "results": { ... },
  "facets": {
    "organism": [
      {"value": "human", "count": 145},
      {"value": "mouse", "count": 89},
      {"value": "rat", "count": 12}
    ],
    "sample_type": [
      {"value": "RNA", "count": 120},
      {"value": "DNA", "count": 95},
      {"value": "Protein", "count": 31}
    ],
    "data_type": [
      {"value": "FASTQ", "count": 180},
      {"value": "BAM", "count": 66}
    ]
  }
}
query FacetedSearch($query: String!) {
  searchWithFacets(query: $query) {
    results {
      samples {
        id
        name
      }
    }
    facets {
      organisms {
        id
        name
        count
      }
      sampleTypes {
        id
        name
        count
      }
      years {
        year
        count
      }
    }
  }
}

Saved Searches

Save frequently used searches:

# Save a complex search
saved_search = client.create_saved_search(
    name="Active RNA-seq Samples",
    resource_type="sample",
    query="type:RNA AND has_data:true",
    filters={
        "organism": "human",
        "created_after": "2024-01-01"
    },
    is_public=False
)

# Use saved search
results = client.run_saved_search(saved_search.id)

Manage Saved Searches

# List your saved searches
my_searches = client.get_saved_searches()

# Share saved search
saved_search.share(groups=["lab-members"], permission="read")

# Update saved search
saved_search.update(
    filters={"organism": "mouse"}
)

Search Performance

Pagination Best Practices

# Bad - loading all results at once
all_samples = client.search_samples(query="RNA", limit=10000)

# Good - paginated iteration
def iterate_search_results(query, page_size=100):
    offset = 0
    while True:
        results = client.search_samples(
            query=query,
            limit=page_size,
            offset=offset
        )
        
        if not results:
            break
            
        for sample in results:
            yield sample
            
        offset += page_size

# Process results efficiently
for sample in iterate_search_results("RNA"):
    process_sample(sample)

Optimize Search Queries

# Use specific filters instead of text search when possible
# Slow - text search
samples = client.search_samples(query="human RNA-seq")

# Fast - filtered search
samples = client.search_samples(
    organism="human",
    sample_type="RNA"
)

# Use projections to reduce data transfer
# GraphQL - request only needed fields
query = """
  query QuickSearch($query: String!) {
    searchSamples(query: $query) {
      edges {
        node {
          id
          name
          # Don't request unnecessary fields
        }
      }
    }
  }
"""

Search Caching

from functools import lru_cache
from datetime import datetime, timedelta

class CachedSearchClient:
    def __init__(self, client):
        self.client = client
        self._cache_duration = timedelta(minutes=5)
        self._cache = {}
    
    def search_samples(self, **kwargs):
        # Create cache key from search parameters
        cache_key = str(sorted(kwargs.items()))
        
        # Check cache
        if cache_key in self._cache:
            cached_result, cached_time = self._cache[cache_key]
            if datetime.now() - cached_time < self._cache_duration:
                return cached_result
        
        # Perform search
        results = self.client.search_samples(**kwargs)
        
        # Cache results
        self._cache[cache_key] = (results, datetime.now())
        
        return results

Advanced Search Patterns

def comprehensive_search(query, filters=None):
    """Search across all resource types with unified results"""
    
    results = {
        'samples': [],
        'projects': [],
        'executions': [],
        'data': []
    }
    
    # Search each resource type
    if 'sample' in filters.get('types', ['sample']):
        results['samples'] = client.search_samples(
            query=query,
            **filters.get('sample_filters', {})
        )
    
    if 'project' in filters.get('types', ['project']):
        results['projects'] = client.search_projects(
            query=query,
            **filters.get('project_filters', {})
        )
    
    # Combine and rank results
    all_results = []
    for resource_type, items in results.items():
        for item in items:
            all_results.append({
                'type': resource_type,
                'item': item,
                'score': calculate_relevance_score(item, query)
            })
    
    # Sort by relevance
    all_results.sort(key=lambda x: x['score'], reverse=True)
    
    return all_results
def search_project_hierarchy(project_query, sample_query=None):
    """Search projects and their samples"""
    
    # Find matching projects
    projects = client.search_projects(query=project_query)
    
    results = []
    for project in projects:
        project_result = {
            'project': project,
            'matching_samples': []
        }
        
        # Search within project samples
        if sample_query:
            samples = client.search_samples(
                query=sample_query,
                project=project.id
            )
            project_result['matching_samples'] = samples
        
        results.append(project_result)
    
    return results
def search_by_metadata(metadata_query):
    """Search using complex metadata queries"""
    
    # Build metadata filter
    filters = {}
    
    for key, value in metadata_query.items():
        if isinstance(value, dict):
            # Range query
            if 'min' in value or 'max' in value:
                filters[f'metadata.{key}'] = {
                    '$gte': value.get('min'),
                    '$lte': value.get('max')
                }
            # Array contains
            elif 'contains' in value:
                filters[f'metadata.{key}'] = {
                    '$contains': value['contains']
                }
        else:
            # Exact match
            filters[f'metadata.{key}'] = value
    
    return client.search_samples(metadata_filters=filters)

# Example usage
results = search_by_metadata({
    'age': {'min': 18, 'max': 65},
    'treatment': 'drug_A',
    'genes': {'contains': 'BRCA1'}
})

Search Export

Export Search Results

def export_search_results(query, format='csv'):
    """Export search results to file"""
    
    results = client.search_samples(query=query, limit=None)
    
    if format == 'csv':
        import csv
        with open('search_results.csv', 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=[
                'id', 'name', 'organism', 'type', 'project', 'created'
            ])
            writer.writeheader()
            
            for sample in results:
                writer.writerow({
                    'id': sample.id,
                    'name': sample.name,
                    'organism': sample.organism.name,
                    'type': sample.sample_type.name,
                    'project': sample.project.name if sample.project else '',
                    'created': sample.created
                })
    
    elif format == 'json':
        import json
        with open('search_results.json', 'w') as f:
            json.dump([
                {
                    'id': s.id,
                    'name': s.name,
                    'organism': s.organism.name,
                    'metadata': s.metadata
                }
                for s in results
            ], f, indent=2)

Search Security

All searches automatically filter by user permissions:

# Only returns samples user has access to
my_samples = client.search_samples(query="RNA-seq")

# Public search (no auth required)
public_projects = client.search_public_projects(query="cancer")

# Group-filtered search
group_samples = client.search_samples(
    query="experiment",
    shared_with_groups=["research-lab"]
)

Audit Search Access

# Log sensitive searches
def audited_search(query, user, search_type='samples'):
    """Perform search with audit logging"""
    
    # Log search attempt
    audit_log.info(f"User {user} searched {search_type}: {query}")
    
    try:
        results = client.search(query, resource_type=search_type)
        
        # Log results count
        audit_log.info(f"Search returned {len(results)} results")
        
        return results
    except Exception as e:
        audit_log.error(f"Search failed: {e}")
        raise

Search Tips

1. Use Specific Filters

# Slower - broad text search
results = client.search("human liver RNA-seq 2024")

# Faster - specific filters
results = client.search_samples(
    organism="human",
    metadata_filters={"tissue": "liver"},
    sample_type="RNA",
    created_after="2024-01-01"
)

2. Leverage Search Suggestions

# Get search suggestions
suggestions = client.get_search_suggestions(
    query="BRC",  # Partial query
    resource_type="sample"
)
# Returns: ["BRCA1", "BRCA2", "BRCC3", ...]

3. Use Search Templates

# Define reusable search templates
SEARCH_TEMPLATES = {
    "recent_rna_seq": {
        "sample_type": "RNA",
        "created_after": datetime.now() - timedelta(days=30),
        "has_data": True
    },
    "public_human": {
        "organism": "human",
        "is_public": True,
        "has_samples": True
    }
}

# Apply template
results = client.search_samples(
    query="cancer",
    **SEARCH_TEMPLATES["recent_rna_seq"]
)

Next Steps

Previous
Downloading Data