API Guides
Search API
Flow provides powerful search capabilities to find samples, projects, executions, and data across the platform. This guide covers all search methods, from quick searches to advanced filtering.
Quick Search
The fastest way to search across all resource types.
REST API Quick Search
curl -H "Authorization: Bearer <token>" \
"https://api.flow.bio/search?q=RNA-seq"
Response:
{
"results": {
"samples": [
{
"id": 123,
"name": "RNA-seq Sample 1",
"type": "sample",
"url": "/samples/123/",
"highlight": "sample_123_<em>RNA-seq</em>_treated"
}
],
"projects": [
{
"id": 10,
"name": "Mouse RNA-seq Study",
"type": "project",
"url": "/projects/10/"
}
],
"executions": [],
"data": []
},
"total_count": 2,
"query": "RNA-seq",
"search_time_ms": 45
}
Python Client Quick Search
# Search across all resources
results = client.search("BRCA1")
print(f"Found {results['total_count']} results:")
print(f"- {len(results['samples'])} samples")
print(f"- {len(results['projects'])} projects")
print(f"- {len(results['executions'])} executions")
print(f"- {len(results['data'])} data files")
# Access specific results
for sample in results['samples']:
print(f"Sample: {sample.name} (ID: {sample.id})")
GraphQL Quick Search
query QuickSearch($query: String!) {
search(query: $query) {
totalCount
samples {
id
name
organism {
name
}
}
projects {
id
name
sampleCount
}
executions {
id
name
status
}
data {
id
filename
size
}
}
}
Sample Search
Advanced Sample Filtering
# Search with multiple filters
samples = client.search_samples(
query="cancer", # Text search
organism="human",
sample_type="RNA",
project=project_id,
created_after="2024-01-01",
created_before="2024-12-31",
has_data=True,
min_data_files=2,
metadata_filters={
"treatment": "drug_A",
"concentration": "10uM"
}
)
for sample in samples:
print(f"{sample.name}: {sample.organism} - {sample.created}")
REST API Sample Search
curl -H "Authorization: Bearer <token>" \
"https://api.flow.bio/samples/search?filter=tumor&organism=human&sample_type=RNA&limit=50"
GraphQL Sample Search
query SearchSamples(
$query: String
$filters: SampleFilterInput
$sort: SampleSortInput
$pagination: PaginationInput
) {
searchSamples(
query: $query
filters: $filters
sort: $sort
pagination: $pagination
) {
totalCount
edges {
node {
id
name
organism {
name
}
sampleType {
name
}
metadata
created
}
highlight {
field
fragments
}
}
facets {
organism {
value
count
}
sampleType {
value
count
}
}
}
}
Sample Search Filters
Filter | Type | Description |
---|---|---|
query | string | Full-text search in name, description, metadata |
organism | ID/string | Filter by organism |
sample_type | ID/string | Filter by sample type |
project | ID | Filter by project |
owner | ID/string | Filter by owner username |
created_after | datetime | Samples created after date |
created_before | datetime | Samples created before date |
modified_after | datetime | Samples modified after date |
has_data | boolean | Only samples with data files |
has_executions | boolean | Only samples used in pipelines |
metadata | object | Filter by metadata fields |
Project Search
Search Public Projects
# Search public projects
public_projects = client.search_projects(
query="RNA-seq mouse",
is_public=True,
organism="mouse",
has_samples=True,
min_samples=10
)
for project in public_projects:
print(f"{project.name}: {project.sample_count} samples")
Project Search API
# REST API
curl -H "Authorization: Bearer <token>" \
"https://api.flow.bio/projects/search?filter=cancer&organism=human&has_samples=true"
GraphQL Project Search
query SearchProjects(
$query: String
$isPublic: Boolean
$organism: ID
$hasExecutions: Boolean
) {
searchProjects(
query: $query
isPublic: $isPublic
organism: $organism
hasExecutions: $hasExecutions
) {
edges {
node {
id
name
description
organism {
name
}
sampleCount
executionCount
isPublic
owner {
username
}
}
}
}
}
Data File Search
Search by Filename Pattern
# Search using wildcards
fastq_files = client.search_data(
pattern="*_R1.fastq.gz",
data_type="FASTQ"
)
# Search by size
large_files = client.search_data(
min_size=1_000_000_000, # Files > 1GB
max_size=10_000_000_000, # Files < 10GB
data_type="BAM"
)
# Search by sample metadata
treated_data = client.search_data(
sample_metadata={
"treatment": "drug_A",
"timepoint": "24h"
}
)
REST API Data Search
curl -H "Authorization: Bearer <token>" \
"https://api.flow.bio/data/search?pattern=*.bam&size_min=1000000000"
GraphQL Data Search
query SearchDataFiles(
$pattern: String!
$dataType: ID
$sizeRange: SizeRangeInput
$sample: ID
) {
searchData(
pattern: $pattern
dataType: $dataType
sizeRange: $sizeRange
sample: $sample
) {
edges {
node {
id
filename
size
dataType {
name
}
sample {
name
}
created
md5
}
}
totalSize
fileCount
}
}
Execution Search
Search Pipeline Runs
# Find completed RNA-seq runs
executions = client.search_executions(
pipeline="RNA-seq",
status="completed",
created_after="2024-01-01",
min_duration=3600, # Runs longer than 1 hour
max_duration=28800 # Runs shorter than 8 hours
)
# Search by parameters
de_analysis = client.search_executions(
pipeline="RNA-seq",
parameters={
"aligner": "star",
"perform_de": True
}
)
Execution Search Filters
query SearchExecutions(
$pipeline: ID
$status: ExecutionStatus
$owner: ID
$dateRange: DateRangeInput
) {
searchExecutions(
pipeline: $pipeline
status: $status
owner: $owner
dateRange: $dateRange
) {
edges {
node {
id
name
status
progress
pipeline {
name
}
duration
created
parameters
}
}
stats {
totalCount
runningCount
completedCount
failedCount
averageDuration
}
}
}
Full-Text Search
Search Syntax
Flow supports advanced search syntax:
Syntax | Example | Description |
---|---|---|
Exact phrase | "RNA sequencing" | Match exact phrase |
OR operator | RNA OR DNA | Match either term |
AND operator | mouse AND liver | Match both terms |
NOT operator | cancer NOT lung | Exclude term |
Wildcard | BRCA* | Match BRCA1, BRCA2, etc. |
Field search | organism:human | Search specific field |
Range | size:[1GB TO 10GB] | Numeric ranges |
Field-Specific Search
# Search specific fields
results = client.search_samples(
query='name:"Sample_001" AND organism:human'
)
# Search metadata fields
results = client.search_samples(
query='metadata.treatment:"drug_A" AND metadata.dose:[5 TO 15]'
)
# Complex queries
results = client.search(
query='(organism:human OR organism:mouse) AND type:RNA* NOT project:test*'
)
Search with Facets
Get aggregated counts for filtering:
REST API with Facets
curl -H "Authorization: Bearer <token>" \
"https://api.flow.bio/search?q=cancer&facets=organism,sample_type,data_type"
Response with facets:
{
"results": { ... },
"facets": {
"organism": [
{"value": "human", "count": 145},
{"value": "mouse", "count": 89},
{"value": "rat", "count": 12}
],
"sample_type": [
{"value": "RNA", "count": 120},
{"value": "DNA", "count": 95},
{"value": "Protein", "count": 31}
],
"data_type": [
{"value": "FASTQ", "count": 180},
{"value": "BAM", "count": 66}
]
}
}
GraphQL Faceted Search
query FacetedSearch($query: String!) {
searchWithFacets(query: $query) {
results {
samples {
id
name
}
}
facets {
organisms {
id
name
count
}
sampleTypes {
id
name
count
}
years {
year
count
}
}
}
}
Saved Searches
Save frequently used searches:
Create Saved Search
# Save a complex search
saved_search = client.create_saved_search(
name="Active RNA-seq Samples",
resource_type="sample",
query="type:RNA AND has_data:true",
filters={
"organism": "human",
"created_after": "2024-01-01"
},
is_public=False
)
# Use saved search
results = client.run_saved_search(saved_search.id)
Manage Saved Searches
# List your saved searches
my_searches = client.get_saved_searches()
# Share saved search
saved_search.share(groups=["lab-members"], permission="read")
# Update saved search
saved_search.update(
filters={"organism": "mouse"}
)
Search Performance
Pagination Best Practices
# Bad - loading all results at once
all_samples = client.search_samples(query="RNA", limit=10000)
# Good - paginated iteration
def iterate_search_results(query, page_size=100):
offset = 0
while True:
results = client.search_samples(
query=query,
limit=page_size,
offset=offset
)
if not results:
break
for sample in results:
yield sample
offset += page_size
# Process results efficiently
for sample in iterate_search_results("RNA"):
process_sample(sample)
Optimize Search Queries
# Use specific filters instead of text search when possible
# Slow - text search
samples = client.search_samples(query="human RNA-seq")
# Fast - filtered search
samples = client.search_samples(
organism="human",
sample_type="RNA"
)
# Use projections to reduce data transfer
# GraphQL - request only needed fields
query = """
query QuickSearch($query: String!) {
searchSamples(query: $query) {
edges {
node {
id
name
# Don't request unnecessary fields
}
}
}
}
"""
Search Caching
from functools import lru_cache
from datetime import datetime, timedelta
class CachedSearchClient:
def __init__(self, client):
self.client = client
self._cache_duration = timedelta(minutes=5)
self._cache = {}
def search_samples(self, **kwargs):
# Create cache key from search parameters
cache_key = str(sorted(kwargs.items()))
# Check cache
if cache_key in self._cache:
cached_result, cached_time = self._cache[cache_key]
if datetime.now() - cached_time < self._cache_duration:
return cached_result
# Perform search
results = self.client.search_samples(**kwargs)
# Cache results
self._cache[cache_key] = (results, datetime.now())
return results
Advanced Search Patterns
Multi-Resource Search
def comprehensive_search(query, filters=None):
"""Search across all resource types with unified results"""
results = {
'samples': [],
'projects': [],
'executions': [],
'data': []
}
# Search each resource type
if 'sample' in filters.get('types', ['sample']):
results['samples'] = client.search_samples(
query=query,
**filters.get('sample_filters', {})
)
if 'project' in filters.get('types', ['project']):
results['projects'] = client.search_projects(
query=query,
**filters.get('project_filters', {})
)
# Combine and rank results
all_results = []
for resource_type, items in results.items():
for item in items:
all_results.append({
'type': resource_type,
'item': item,
'score': calculate_relevance_score(item, query)
})
# Sort by relevance
all_results.sort(key=lambda x: x['score'], reverse=True)
return all_results
Hierarchical Search
def search_project_hierarchy(project_query, sample_query=None):
"""Search projects and their samples"""
# Find matching projects
projects = client.search_projects(query=project_query)
results = []
for project in projects:
project_result = {
'project': project,
'matching_samples': []
}
# Search within project samples
if sample_query:
samples = client.search_samples(
query=sample_query,
project=project.id
)
project_result['matching_samples'] = samples
results.append(project_result)
return results
Metadata Search
def search_by_metadata(metadata_query):
"""Search using complex metadata queries"""
# Build metadata filter
filters = {}
for key, value in metadata_query.items():
if isinstance(value, dict):
# Range query
if 'min' in value or 'max' in value:
filters[f'metadata.{key}'] = {
'$gte': value.get('min'),
'$lte': value.get('max')
}
# Array contains
elif 'contains' in value:
filters[f'metadata.{key}'] = {
'$contains': value['contains']
}
else:
# Exact match
filters[f'metadata.{key}'] = value
return client.search_samples(metadata_filters=filters)
# Example usage
results = search_by_metadata({
'age': {'min': 18, 'max': 65},
'treatment': 'drug_A',
'genes': {'contains': 'BRCA1'}
})
Search Export
Export Search Results
def export_search_results(query, format='csv'):
"""Export search results to file"""
results = client.search_samples(query=query, limit=None)
if format == 'csv':
import csv
with open('search_results.csv', 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=[
'id', 'name', 'organism', 'type', 'project', 'created'
])
writer.writeheader()
for sample in results:
writer.writerow({
'id': sample.id,
'name': sample.name,
'organism': sample.organism.name,
'type': sample.sample_type.name,
'project': sample.project.name if sample.project else '',
'created': sample.created
})
elif format == 'json':
import json
with open('search_results.json', 'w') as f:
json.dump([
{
'id': s.id,
'name': s.name,
'organism': s.organism.name,
'metadata': s.metadata
}
for s in results
], f, indent=2)
Search Security
Permission-Aware Search
All searches automatically filter by user permissions:
# Only returns samples user has access to
my_samples = client.search_samples(query="RNA-seq")
# Public search (no auth required)
public_projects = client.search_public_projects(query="cancer")
# Group-filtered search
group_samples = client.search_samples(
query="experiment",
shared_with_groups=["research-lab"]
)
Audit Search Access
# Log sensitive searches
def audited_search(query, user, search_type='samples'):
"""Perform search with audit logging"""
# Log search attempt
audit_log.info(f"User {user} searched {search_type}: {query}")
try:
results = client.search(query, resource_type=search_type)
# Log results count
audit_log.info(f"Search returned {len(results)} results")
return results
except Exception as e:
audit_log.error(f"Search failed: {e}")
raise
Search Tips
1. Use Specific Filters
# Slower - broad text search
results = client.search("human liver RNA-seq 2024")
# Faster - specific filters
results = client.search_samples(
organism="human",
metadata_filters={"tissue": "liver"},
sample_type="RNA",
created_after="2024-01-01"
)
2. Leverage Search Suggestions
# Get search suggestions
suggestions = client.get_search_suggestions(
query="BRC", # Partial query
resource_type="sample"
)
# Returns: ["BRCA1", "BRCA2", "BRCC3", ...]
3. Use Search Templates
# Define reusable search templates
SEARCH_TEMPLATES = {
"recent_rna_seq": {
"sample_type": "RNA",
"created_after": datetime.now() - timedelta(days=30),
"has_data": True
},
"public_human": {
"organism": "human",
"is_public": True,
"has_samples": True
}
}
# Apply template
results = client.search_samples(
query="cancer",
**SEARCH_TEMPLATES["recent_rna_seq"]
)
Next Steps
- API Overview - Understanding Flow's APIs
- Permissions Guide - Search and permissions
- Python Client Guide - Search using Python
- GraphQL API - Advanced GraphQL searches