Utilities
This page documents utility functions and helper classes provided by Manas.
Text Processing
Tokenization
1
2
3
4
5
6
7
8
9
10
11
from manas_ai.tokenizers import count_tokens, split_by_tokens
# Count tokens in text
token_count = count_tokens("Your text here")
# Split text by token count
chunks = split_by_tokens(
text="Long text to split",
chunk_size=500,
overlap=50
)
Text Cleaning
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from manas_ai.utils.text import (
clean_text,
remove_extra_whitespace,
normalize_line_endings
)
# Clean text
cleaned = clean_text("Your messy\ttext\n\nhere")
# Remove extra whitespace
normalized = remove_extra_whitespace("Too many spaces")
# Normalize line endings
fixed = normalize_line_endings("Mixed\r\nline\nends")
Document Processing
Document Loading
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from manas_ai.utils.docs import load_document, detect_format
# Detect document format
format_info = detect_format("document.pdf")
# Load document with auto-detection
doc = load_document("document.pdf")
# Load with specific format
doc = load_document(
"document.txt",
format="text",
encoding="utf-8"
)
Document Chunking
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from manas_ai.utils.docs import chunk_document, merge_chunks
# Split document into chunks
chunks = chunk_document(
document,
chunk_size=500,
overlap=50,
strategy="sentence"
)
# Merge small chunks
merged = merge_chunks(
chunks,
min_size=200,
max_size=1000
)
Async Utilities
Concurrency Helpers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from manas_ai.utils.async_utils import (
run_concurrently,
with_timeout,
retry_async
)
# Run tasks concurrently
results = await run_concurrently(
tasks=[task1, task2, task3],
max_concurrency=3
)
# Add timeout to coroutine
result = await with_timeout(
coro=long_running_task(),
timeout=30
)
# Retry with backoff
result = await retry_async(
coro=flaky_operation(),
max_attempts=3,
backoff_base=2
)
Resource Management
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from manas_ai.utils.async_utils import (
AsyncResourceManager,
cleanup_resources
)
# Manage async resources
async with AsyncResourceManager() as resources:
# Add resources
await resources.add(resource1)
await resources.add(resource2)
# Resources auto-cleanup after block
# Manual cleanup
await cleanup_resources([resource1, resource2])
Cache Management
Memory Cache
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from manas_ai.utils.cache import (
MemoryCache,
TTLCache,
LRUCache
)
# Simple memory cache
cache = MemoryCache()
cache.set("key", "value")
value = cache.get("key")
# Cache with TTL
cache = TTLCache(ttl=3600) # 1 hour
cache.set("key", "value")
# LRU cache with max size
cache = LRUCache(maxsize=1000)
cache.set("key", "value")
Disk Cache
1
2
3
4
5
6
7
8
9
10
11
12
13
from manas_ai.utils.cache import DiskCache
# Initialize disk cache
cache = DiskCache(
directory="cache",
ttl=86400, # 1 day
cleanup_interval=3600
)
# Cache operations
await cache.set("key", "value")
value = await cache.get("key")
await cache.delete("key")
Configuration Utilities
Environment Helpers
1
2
3
4
5
6
7
8
9
10
11
12
13
from manas_ai.utils.config import (
load_env,
get_env,
parse_bool
)
# Load .env file
load_env()
# Get environment variable with type
api_key = get_env("API_KEY", required=True)
debug = parse_bool(get_env("DEBUG", default="false"))
port = get_env("PORT", default="8080", cast=int)
Config Loading
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from manas_ai.utils.config import (
load_config,
merge_configs,
validate_config
)
# Load configuration
config = load_config("config.yml")
# Merge configurations
merged = merge_configs(default_config, user_config)
# Validate configuration
errors = validate_config(config, schema)
Logging Utilities
Logging Setup
1
2
3
4
5
6
7
8
9
from manas_ai.utils.logging import setup_logging
# Configure logging
setup_logging(
level="INFO",
format="detailed",
output="logs/app.log",
rotation="1 day"
)
Log Formatting
1
2
3
4
5
6
7
8
9
10
11
12
from manas_ai.utils.logging import (
format_error,
format_request,
format_response
)
# Format error for logging
error_log = format_error(error, include_trace=True)
# Format API request/response
req_log = format_request(request)
resp_log = format_response(response)
Type Utilities
Type Checking
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from manas_ai.utils.types import (
is_coroutine,
is_generator,
is_async_generator
)
# Check types
if is_coroutine(obj):
result = await obj
if is_generator(obj):
results = list(obj)
if is_async_generator(obj):
async for item in obj:
process(item)
Type Conversion
1
2
3
4
5
6
7
8
9
10
11
12
from manas_ai.utils.types import (
to_bool,
to_int,
to_float,
to_list
)
# Convert types safely
bool_val = to_bool("true") # True
int_val = to_int("123") # 123
float_val = to_float("12.34") # 12.34
list_val = to_list("a,b,c") # ["a", "b", "c"]
Debug Utilities
Performance Monitoring
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from manas_ai.utils.debug import (
timer,
memory_usage,
profile_function
)
# Measure execution time
with timer() as t:
long_operation()
print(f"Took {t.elapsed:.2f}s")
# Monitor memory usage
with memory_usage() as mem:
memory_intensive_operation()
print(f"Peak memory: {mem.peak_mb}MB")
# Profile function
stats = profile_function(target_function)
Debug Information
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from manas_ai.utils.debug import (
get_stack_trace,
object_info,
memory_snapshot
)
# Get stack trace
trace = get_stack_trace()
# Get object information
info = object_info(obj)
# Get memory snapshot
snapshot = memory_snapshot()
Best Practices
- Error Handling
- Use appropriate error types
- Handle edge cases
- Provide context
- Clean up resources
- Performance
- Cache expensive operations
- Use async where appropriate
- Monitor resource usage
- Clean up properly
- Type Safety
- Use type hints
- Validate inputs
- Convert safely
- Handle edge cases
- Configuration
- Use environment variables
- Validate configs
- Provide defaults
- Document options
- Debugging
- Log appropriately
- Monitor performance
- Profile when needed
- Clean up resources
Notes
- Handle errors gracefully
- Clean up resources
- Use type hints
- Document functions
- Test utilities
- Monitor performance