Since v0.1.0 stable Core

Utilities

This page documents utility functions and helper classes provided by Manas.

Text Processing

Tokenization

from manas_ai.tokenizers import count_tokens, split_by_tokens

# Count tokens in text
token_count = count_tokens("Your text here")

# Split text by token count
chunks = split_by_tokens(
    text="Long text to split",
    chunk_size=500,
    overlap=50
)

Text Cleaning

from manas_ai.utils.text import (
    clean_text,
    remove_extra_whitespace,
    normalize_line_endings
)

# Clean text
cleaned = clean_text("Your  messy\ttext\n\nhere")

# Remove extra whitespace
normalized = remove_extra_whitespace("Too   many    spaces")

# Normalize line endings
fixed = normalize_line_endings("Mixed\r\nline\nends")

Document Processing

Document Loading

from manas_ai.utils.docs import load_document, detect_format

# Detect document format
format_info = detect_format("document.pdf")

# Load document with auto-detection
doc = load_document("document.pdf")

# Load with specific format
doc = load_document(
    "document.txt",
    format="text",
    encoding="utf-8"
)

Document Chunking

from manas_ai.utils.docs import chunk_document, merge_chunks

# Split document into chunks
chunks = chunk_document(
    document,
    chunk_size=500,
    overlap=50,
    strategy="sentence"
)

# Merge small chunks
merged = merge_chunks(
    chunks,
    min_size=200,
    max_size=1000
)

Async Utilities

Concurrency Helpers

from manas_ai.utils.async_utils import (
    run_concurrently,
    with_timeout,
    retry_async
)

# Run tasks concurrently
results = await run_concurrently(
    tasks=[task1, task2, task3],
    max_concurrency=3
)

# Add timeout to coroutine
result = await with_timeout(
    coro=long_running_task(),
    timeout=30
)

# Retry with backoff
result = await retry_async(
    coro=flaky_operation(),
    max_attempts=3,
    backoff_base=2
)

Resource Management

from manas_ai.utils.async_utils import (
    AsyncResourceManager,
    cleanup_resources
)

# Manage async resources
async with AsyncResourceManager() as resources:
    # Add resources
    await resources.add(resource1)
    await resources.add(resource2)
    
    # Resources auto-cleanup after block

# Manual cleanup
await cleanup_resources([resource1, resource2])

Cache Management

Memory Cache

from manas_ai.utils.cache import (
    MemoryCache,
    TTLCache,
    LRUCache
)

# Simple memory cache
cache = MemoryCache()
cache.set("key", "value")
value = cache.get("key")

# Cache with TTL
cache = TTLCache(ttl=3600)  # 1 hour
cache.set("key", "value")

# LRU cache with max size
cache = LRUCache(maxsize=1000)
cache.set("key", "value")

Disk Cache

from manas_ai.utils.cache import DiskCache

# Initialize disk cache
cache = DiskCache(
    directory="cache",
    ttl=86400,  # 1 day
    cleanup_interval=3600
)

# Cache operations
await cache.set("key", "value")
value = await cache.get("key")
await cache.delete("key")

Configuration Utilities

Environment Helpers

from manas_ai.utils.config import (
    load_env,
    get_env,
    parse_bool
)

# Load .env file
load_env()

# Get environment variable with type
api_key = get_env("API_KEY", required=True)
debug = parse_bool(get_env("DEBUG", default="false"))
port = get_env("PORT", default="8080", cast=int)

Config Loading

from manas_ai.utils.config import (
    load_config,
    merge_configs,
    validate_config
)

# Load configuration
config = load_config("config.yml")

# Merge configurations
merged = merge_configs(default_config, user_config)

# Validate configuration
errors = validate_config(config, schema)

Logging Utilities

Logging Setup

from manas_ai.utils.logging import setup_logging

# Configure logging
setup_logging(
    level="INFO",
    format="detailed",
    output="logs/app.log",
    rotation="1 day"
)

Log Formatting

from manas_ai.utils.logging import (
    format_error,
    format_request,
    format_response
)

# Format error for logging
error_log = format_error(error, include_trace=True)

# Format API request/response
req_log = format_request(request)
resp_log = format_response(response)

Type Utilities

Type Checking

from manas_ai.utils.types import (
    is_coroutine,
    is_generator,
    is_async_generator
)

# Check types
if is_coroutine(obj):
    result = await obj
    
if is_generator(obj):
    results = list(obj)
    
if is_async_generator(obj):
    async for item in obj:
        process(item)

Type Conversion

from manas_ai.utils.types import (
    to_bool,
    to_int,
    to_float,
    to_list
)

# Convert types safely
bool_val = to_bool("true")     # True
int_val = to_int("123")        # 123
float_val = to_float("12.34")  # 12.34
list_val = to_list("a,b,c")    # ["a", "b", "c"]

Debug Utilities

Performance Monitoring

from manas_ai.utils.debug import (
    timer,
    memory_usage,
    profile_function
)

# Measure execution time
with timer() as t:
    long_operation()
print(f"Took {t.elapsed:.2f}s")

# Monitor memory usage
with memory_usage() as mem:
    memory_intensive_operation()
print(f"Peak memory: {mem.peak_mb}MB")

# Profile function
stats = profile_function(target_function)

Debug Information

from manas_ai.utils.debug import (
    get_stack_trace,
    object_info,
    memory_snapshot
)

# Get stack trace
trace = get_stack_trace()

# Get object information
info = object_info(obj)

# Get memory snapshot
snapshot = memory_snapshot()

Best Practices

Error Handling
- Use appropriate error types
- Handle edge cases
- Provide context
- Clean up resources
Performance
- Cache expensive operations
- Use async where appropriate
- Monitor resource usage
- Clean up properly
Type Safety
- Use type hints
- Validate inputs
- Convert safely
- Handle edge cases
Configuration
- Use environment variables
- Validate configs
- Provide defaults
- Document options
Debugging
- Log appropriately
- Monitor performance
- Profile when needed
- Clean up resources

Notes

Handle errors gracefully
Clean up resources
Use type hints
Document functions
Test utilities
Monitor performance

2 id="text-processing">Text Processing

3 id="document-loading">Document Loading
3 id="document-chunking">Document Chunking
2 id="async-utilities">Async Utilities

3 id="concurrency-helpers">Concurrency Helpers
3 id="resource-management">Resource Management
2 id="cache-management">Cache Management

3 id="memory-cache">Memory Cache
3 id="disk-cache">Disk Cache
2 id="configuration-utilities">Configuration Utilities

3 id="environment-helpers">Environment Helpers
3 id="config-loading">Config Loading
2 id="logging-utilities">Logging Utilities

3 id="logging-setup">Logging Setup
3 id="log-formatting">Log Formatting
2 id="type-utilities">Type Utilities

3 id="type-checking">Type Checking
3 id="type-conversion">Type Conversion
2 id="debug-utilities">Debug Utilities

3 id="performance-monitoring">Performance Monitoring
3 id="debug-information">Debug Information
2 id="best-practices">Best Practices

2 id="notes">Notes