Python API Reference
Complete reference for using m1f programmatically in Python applications
The m1f tool can be used programmatically in Python applications, allowing you to integrate file bundling capabilities directly into your code.
Installation
# Clone the repository
git clone https://github.com/rlichtenwalter/m1f.git
cd m1f
# Install dependencies
pip install -r requirements.txt
Module Import
# Import the m1f module
from tools.m1f import cli, core, config
# Or use the main entry point
from tools.m1f.cli import main
Basic Usage
Using the CLI Interface Programmatically
import sys
from tools.m1f.cli import main
# Simulate command-line arguments
sys.argv = ['m1f', '-s', './src', '-o', 'output.txt', '--include-extensions', '.py', '.js']
# Run m1f
main()
Using the Core API
from tools.m1f.core import FileProcessor
from tools.m1f.config import M1FConfig
from tools.m1f.constants import SeparatorStyle
# Create configuration
config = M1FConfig(
source_directories=['./src'],
output_file='output.txt',
include_extensions=['.py', '.js'],
separator_style=SeparatorStyle.MARKDOWN,
verbose=True
)
# Process files
processor = FileProcessor(config)
processor.process()
Configuration Options
The M1FConfig
class accepts all the same parameters as the CLI:
from tools.m1f.config import M1FConfig
from tools.m1f.constants import SeparatorStyle, SecurityCheckMode
config = M1FConfig(
# Required
output_file='combined.txt',
# Input sources
source_directories=['./src', './docs'],
input_file='files.txt',
# File filtering
include_extensions=['.py', '.js', '.md'],
exclude_extensions=['.pyc', '.log'],
excludes=['*.test.js', 'temp/*'],
exclude_paths_file=['.gitignore'],
include_paths_file=['important-files.txt'],
# Processing options
max_file_size='1MB',
include_dot_paths=False,
include_binary_files=False,
include_symlinks=True,
no_default_excludes=False,
remove_scraped_metadata=True,
# Output formatting
separator_style=SeparatorStyle.DETAILED,
line_ending='lf',
add_timestamp=True,
filename_mtime_hash=False,
# Encoding
convert_to_charset='utf-8',
abort_on_encoding_error=False,
no_prefer_utf8_for_text_files=False,
# Security
security_check=SecurityCheckMode.WARN,
# Archive
create_archive=True,
archive_type='zip',
# Output control
force=True,
minimal_output=False,
skip_output_file=False,
allow_duplicate_files=False,
# Logging
verbose=True,
quiet=False,
# Presets
preset=['default.yml'],
preset_group='production',
disable_presets=False
)
Advanced Usage
Custom File Processing
from tools.m1f.file_processor import FileProcessor
from tools.m1f.config import M1FConfig
class CustomFileProcessor(FileProcessor):
def process_file_content(self, filepath, content):
# Custom processing logic
if filepath.endswith('.py'):
# Add custom header to Python files
content = f"# Processed by m1f\n{content}"
return content
# Use custom processor
config = M1FConfig(source_directories=['./src'], output_file='output.txt')
processor = CustomFileProcessor(config)
processor.process()
Async Processing
The m1f tool uses async I/O for improved performance:
import asyncio
from tools.m1f.file_processor import async_read_file
async def process_files_async(file_paths):
tasks = [async_read_file(path) for path in file_paths]
results = await asyncio.gather(*tasks)
return results
# Run async processing
file_paths = ['file1.py', 'file2.js', 'file3.md']
loop = asyncio.get_event_loop()
contents = loop.run_until_complete(process_files_async(file_paths))
Programmatic Preset Usage
from tools.m1f.config import M1FConfig, PresetLoader
# Load presets programmatically
preset_loader = PresetLoader()
preset_config = preset_loader.load_preset_file('wordpress.m1f-presets.yml')
# Merge with custom config
config = M1FConfig(
source_directories=['./wp-content'],
output_file='wordpress-bundle.txt',
**preset_config
)
Exception Handling
from tools.m1f.exceptions import (
M1FError,
FileNotFoundError,
PermissionError,
EncodingError,
ConfigurationError,
ValidationError,
SecurityError,
ArchiveError
)
try:
processor = FileProcessor(config)
processor.process()
except FileNotFoundError as e:
print(f"File not found: {e}")
except PermissionError as e:
print(f"Permission denied: {e}")
except SecurityError as e:
print(f"Security check failed: {e}")
except M1FError as e:
print(f"General m1f error: {e}")
Integration Examples
Django Integration
# In Django management command
from django.core.management.base import BaseCommand
from tools.m1f.core import FileProcessor
from tools.m1f.config import M1FConfig
class Command(BaseCommand):
help = 'Bundle project files for documentation'
def handle(self, *args, **options):
config = M1FConfig(
source_directories=['./apps'],
output_file='./docs/project-bundle.txt',
include_extensions=['.py', '.html', '.js'],
exclude_paths_file=['.gitignore']
)
processor = FileProcessor(config)
processor.process()
self.stdout.write(self.style.SUCCESS('Successfully created bundle'))
Flask Integration
from flask import Flask, send_file
from tools.m1f.core import FileProcessor
from tools.m1f.config import M1FConfig
import tempfile
app = Flask(__name__)
@app.route('/generate-bundle')
def generate_bundle():
with tempfile.NamedTemporaryFile(suffix='.txt', delete=False) as tmp:
config = M1FConfig(
source_directories=['./app'],
output_file=tmp.name,
include_extensions=['.py', '.html', '.css', '.js']
)
processor = FileProcessor(config)
processor.process()
return send_file(tmp.name, as_attachment=True,
download_name='app-bundle.txt')
CI/CD Pipeline Integration
# build_bundle.py
import sys
from tools.m1f.core import FileProcessor
from tools.m1f.config import M1FConfig
from tools.m1f.constants import SecurityCheckMode
def create_release_bundle():
config = M1FConfig(
source_directories=['./src'],
output_file='./dist/release-bundle.txt',
security_check=SecurityCheckMode.ERROR, # Fail on secrets
create_archive=True,
archive_type='tar.gz',
add_timestamp=True
)
try:
processor = FileProcessor(config)
processor.process()
print("Bundle created successfully")
return 0
except Exception as e:
print(f"Bundle creation failed: {e}")
return 1
if __name__ == '__main__':
sys.exit(create_release_bundle())
Module Architecture
The m1f package is organized into the following modules:
cli.py
- Command-line interface and argument parsingcore.py
- Main orchestration logicconfig.py
- Configuration management and validationconstants.py
- Constants and enumerationsexceptions.py
- Custom exception classesfile_processor.py
- File handling with async I/Oencoding_handler.py
- Smart encoding detectionsecurity_scanner.py
- Secret detection integrationoutput_writer.py
- Output file generationarchive_creator.py
- Archive creation functionalityseparator_generator.py
- Separator formattinglogging.py
- Structured logging utilitiesutils.py
- Utility functions
Best Practices
- Always handle exceptions - m1f provides specific exception types for different error scenarios
- Use configuration objects - Don’t modify sys.argv directly unless necessary
- Leverage async I/O - For large file sets, the async capabilities provide significant performance benefits
- Respect security checks - Don’t disable security scanning in production environments
- Test with small datasets first - Validate your configuration before processing large projects
See Also
- CLI Reference - Complete command-line options
- Configuration Guide - Detailed configuration documentation
- Preset System - File-specific processing rules
- Previous
- Web Scraper Backends Guide
- Next
- m1f-init Tool