Python API Reference

Complete reference for using m1f programmatically in Python applications

The m1f tool can be used programmatically in Python applications, allowing you to integrate file bundling capabilities directly into your code.

Installation

# Clone the repository
git clone https://github.com/rlichtenwalter/m1f.git
cd m1f

# Install dependencies
pip install -r requirements.txt

Module Import

# Import the m1f module
from tools.m1f import cli, core, config

# Or use the main entry point
from tools.m1f.cli import main

Basic Usage

Using the CLI Interface Programmatically

import sys
from tools.m1f.cli import main

# Simulate command-line arguments
sys.argv = ['m1f', '-s', './src', '-o', 'output.txt', '--include-extensions', '.py', '.js']

# Run m1f
main()

Using the Core API

from tools.m1f.core import FileProcessor
from tools.m1f.config import M1FConfig
from tools.m1f.constants import SeparatorStyle

# Create configuration
config = M1FConfig(
    source_directories=['./src'],
    output_file='output.txt',
    include_extensions=['.py', '.js'],
    separator_style=SeparatorStyle.MARKDOWN,
    verbose=True
)

# Process files
processor = FileProcessor(config)
processor.process()

Configuration Options

The M1FConfig class accepts all the same parameters as the CLI:

from tools.m1f.config import M1FConfig
from tools.m1f.constants import SeparatorStyle, SecurityCheckMode

config = M1FConfig(
    # Required
    output_file='combined.txt',
    
    # Input sources
    source_directories=['./src', './docs'],
    input_file='files.txt',
    
    # File filtering
    include_extensions=['.py', '.js', '.md'],
    exclude_extensions=['.pyc', '.log'],
    excludes=['*.test.js', 'temp/*'],
    exclude_paths_file=['.gitignore'],
    include_paths_file=['important-files.txt'],
    
    # Processing options
    max_file_size='1MB',
    include_dot_paths=False,
    include_binary_files=False,
    include_symlinks=True,
    no_default_excludes=False,
    remove_scraped_metadata=True,
    
    # Output formatting
    separator_style=SeparatorStyle.DETAILED,
    line_ending='lf',
    add_timestamp=True,
    filename_mtime_hash=False,
    
    # Encoding
    convert_to_charset='utf-8',
    abort_on_encoding_error=False,
    no_prefer_utf8_for_text_files=False,
    
    # Security
    security_check=SecurityCheckMode.WARN,
    
    # Archive
    create_archive=True,
    archive_type='zip',
    
    # Output control
    force=True,
    minimal_output=False,
    skip_output_file=False,
    allow_duplicate_files=False,
    
    # Logging
    verbose=True,
    quiet=False,
    
    # Presets
    preset=['default.yml'],
    preset_group='production',
    disable_presets=False
)

Advanced Usage

Custom File Processing

from tools.m1f.file_processor import FileProcessor
from tools.m1f.config import M1FConfig

class CustomFileProcessor(FileProcessor):
    def process_file_content(self, filepath, content):
        # Custom processing logic
        if filepath.endswith('.py'):
            # Add custom header to Python files
            content = f"# Processed by m1f\n{content}"
        return content

# Use custom processor
config = M1FConfig(source_directories=['./src'], output_file='output.txt')
processor = CustomFileProcessor(config)
processor.process()

Async Processing

The m1f tool uses async I/O for improved performance:

import asyncio
from tools.m1f.file_processor import async_read_file

async def process_files_async(file_paths):
    tasks = [async_read_file(path) for path in file_paths]
    results = await asyncio.gather(*tasks)
    return results

# Run async processing
file_paths = ['file1.py', 'file2.js', 'file3.md']
loop = asyncio.get_event_loop()
contents = loop.run_until_complete(process_files_async(file_paths))

Programmatic Preset Usage

from tools.m1f.config import M1FConfig, PresetLoader

# Load presets programmatically
preset_loader = PresetLoader()
preset_config = preset_loader.load_preset_file('wordpress.m1f-presets.yml')

# Merge with custom config
config = M1FConfig(
    source_directories=['./wp-content'],
    output_file='wordpress-bundle.txt',
    **preset_config
)

Exception Handling

from tools.m1f.exceptions import (
    M1FError,
    FileNotFoundError,
    PermissionError,
    EncodingError,
    ConfigurationError,
    ValidationError,
    SecurityError,
    ArchiveError
)

try:
    processor = FileProcessor(config)
    processor.process()
except FileNotFoundError as e:
    print(f"File not found: {e}")
except PermissionError as e:
    print(f"Permission denied: {e}")
except SecurityError as e:
    print(f"Security check failed: {e}")
except M1FError as e:
    print(f"General m1f error: {e}")

Integration Examples

Django Integration

# In Django management command
from django.core.management.base import BaseCommand
from tools.m1f.core import FileProcessor
from tools.m1f.config import M1FConfig

class Command(BaseCommand):
    help = 'Bundle project files for documentation'
    
    def handle(self, *args, **options):
        config = M1FConfig(
            source_directories=['./apps'],
            output_file='./docs/project-bundle.txt',
            include_extensions=['.py', '.html', '.js'],
            exclude_paths_file=['.gitignore']
        )
        
        processor = FileProcessor(config)
        processor.process()
        
        self.stdout.write(self.style.SUCCESS('Successfully created bundle'))

Flask Integration

from flask import Flask, send_file
from tools.m1f.core import FileProcessor
from tools.m1f.config import M1FConfig
import tempfile

app = Flask(__name__)

@app.route('/generate-bundle')
def generate_bundle():
    with tempfile.NamedTemporaryFile(suffix='.txt', delete=False) as tmp:
        config = M1FConfig(
            source_directories=['./app'],
            output_file=tmp.name,
            include_extensions=['.py', '.html', '.css', '.js']
        )
        
        processor = FileProcessor(config)
        processor.process()
        
        return send_file(tmp.name, as_attachment=True, 
                        download_name='app-bundle.txt')

CI/CD Pipeline Integration

# build_bundle.py
import sys
from tools.m1f.core import FileProcessor
from tools.m1f.config import M1FConfig
from tools.m1f.constants import SecurityCheckMode

def create_release_bundle():
    config = M1FConfig(
        source_directories=['./src'],
        output_file='./dist/release-bundle.txt',
        security_check=SecurityCheckMode.ERROR,  # Fail on secrets
        create_archive=True,
        archive_type='tar.gz',
        add_timestamp=True
    )
    
    try:
        processor = FileProcessor(config)
        processor.process()
        print("Bundle created successfully")
        return 0
    except Exception as e:
        print(f"Bundle creation failed: {e}")
        return 1

if __name__ == '__main__':
    sys.exit(create_release_bundle())

Module Architecture

The m1f package is organized into the following modules:

  • cli.py - Command-line interface and argument parsing
  • core.py - Main orchestration logic
  • config.py - Configuration management and validation
  • constants.py - Constants and enumerations
  • exceptions.py - Custom exception classes
  • file_processor.py - File handling with async I/O
  • encoding_handler.py - Smart encoding detection
  • security_scanner.py - Secret detection integration
  • output_writer.py - Output file generation
  • archive_creator.py - Archive creation functionality
  • separator_generator.py - Separator formatting
  • logging.py - Structured logging utilities
  • utils.py - Utility functions

Best Practices

  1. Always handle exceptions - m1f provides specific exception types for different error scenarios
  2. Use configuration objects - Don’t modify sys.argv directly unless necessary
  3. Leverage async I/O - For large file sets, the async capabilities provide significant performance benefits
  4. Respect security checks - Don’t disable security scanning in production environments
  5. Test with small datasets first - Validate your configuration before processing large projects

See Also