Source from repo
Backtesting Frameworks

Implement and analyze trading strategy backtests using Python frameworks like Backtrader, Zipline, or VectorBT.
wshobsonGitHub wshobsonSource repo Original GitHub link Publisher page
Files
Skill
n/a
Size
21.2 KB
Entrypoint
SKILL.md
Format
git-repo
Open file
references/details.md

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
markdown578 linesFree
references/details.md
1# backtesting-frameworks — detailed worked examples
2 
3## Implementation Patterns
4 
5### Pattern 1: Event-Driven Backtester
6 
7```python
8from abc import ABC, abstractmethod
9from dataclasses import dataclass, field
10from datetime import datetime
11from decimal import Decimal
12from enum import Enum
13from typing import Dict, List, Optional
14import pandas as pd
15import numpy as np
16 
17class OrderSide(Enum):
18    BUY = "buy"
19    SELL = "sell"
20 
21class OrderType(Enum):
22    MARKET = "market"
23    LIMIT = "limit"
24    STOP = "stop"
25 
26@dataclass
27class Order:
28    symbol: str
29    side: OrderSide
30    quantity: Decimal
31    order_type: OrderType
32    limit_price: Optional[Decimal] = None
33    stop_price: Optional[Decimal] = None
34    timestamp: Optional[datetime] = None
35 
36@dataclass
37class Fill:
38    order: Order
39    fill_price: Decimal
40    fill_quantity: Decimal
41    commission: Decimal
42    slippage: Decimal
43    timestamp: datetime
44 
45@dataclass
46class Position:
47    symbol: str
48    quantity: Decimal = Decimal("0")
49    avg_cost: Decimal = Decimal("0")
50    realized_pnl: Decimal = Decimal("0")
51 
52    def update(self, fill: Fill) -> None:
53        if fill.order.side == OrderSide.BUY:
54            new_quantity = self.quantity + fill.fill_quantity
55            if new_quantity != 0:
56                self.avg_cost = (
57                    (self.quantity * self.avg_cost + fill.fill_quantity * fill.fill_price)
58                    / new_quantity
59                )
60            self.quantity = new_quantity
61        else:
62            self.realized_pnl += fill.fill_quantity * (fill.fill_price - self.avg_cost)
63            self.quantity -= fill.fill_quantity
64 
65@dataclass
66class Portfolio:
67    cash: Decimal
68    positions: Dict[str, Position] = field(default_factory=dict)
69 
70    def get_position(self, symbol: str) -> Position:
71        if symbol not in self.positions:
72            self.positions[symbol] = Position(symbol=symbol)
73        return self.positions[symbol]
74 
75    def process_fill(self, fill: Fill) -> None:
76        position = self.get_position(fill.order.symbol)
77        position.update(fill)
78 
79        if fill.order.side == OrderSide.BUY:
80            self.cash -= fill.fill_price * fill.fill_quantity + fill.commission
81        else:
82            self.cash += fill.fill_price * fill.fill_quantity - fill.commission
83 
84    def get_equity(self, prices: Dict[str, Decimal]) -> Decimal:
85        equity = self.cash
86        for symbol, position in self.positions.items():
87            if position.quantity != 0 and symbol in prices:
88                equity += position.quantity * prices[symbol]
89        return equity
90 
91class Strategy(ABC):
92    @abstractmethod
93    def on_bar(self, timestamp: datetime, data: pd.DataFrame) -> List[Order]:
94        pass
95 
96    @abstractmethod
97    def on_fill(self, fill: Fill) -> None:
98        pass
99 
100class ExecutionModel(ABC):
101    @abstractmethod
102    def execute(self, order: Order, bar: pd.Series) -> Optional[Fill]:
103        pass
104 
105class SimpleExecutionModel(ExecutionModel):
106    def __init__(self, slippage_bps: float = 10, commission_per_share: float = 0.01):
107        self.slippage_bps = slippage_bps
108        self.commission_per_share = commission_per_share
109 
110    def execute(self, order: Order, bar: pd.Series) -> Optional[Fill]:
111        if order.order_type == OrderType.MARKET:
112            base_price = Decimal(str(bar["open"]))
113 
114            # Apply slippage
115            slippage_mult = 1 + (self.slippage_bps / 10000)
116            if order.side == OrderSide.BUY:
117                fill_price = base_price * Decimal(str(slippage_mult))
118            else:
119                fill_price = base_price / Decimal(str(slippage_mult))
120 
121            commission = order.quantity * Decimal(str(self.commission_per_share))
122            slippage = abs(fill_price - base_price) * order.quantity
123 
124            return Fill(
125                order=order,
126                fill_price=fill_price,
127                fill_quantity=order.quantity,
128                commission=commission,
129                slippage=slippage,
130                timestamp=bar.name
131            )
132        return None
133 
134class Backtester:
135    def __init__(
136        self,
137        strategy: Strategy,
138        execution_model: ExecutionModel,
139        initial_capital: Decimal = Decimal("100000")
140    ):
141        self.strategy = strategy
142        self.execution_model = execution_model
143        self.portfolio = Portfolio(cash=initial_capital)
144        self.equity_curve: List[tuple] = []
145        self.trades: List[Fill] = []
146 
147    def run(self, data: pd.DataFrame) -> pd.DataFrame:
148        """Run backtest on OHLCV data with DatetimeIndex."""
149        pending_orders: List[Order] = []
150 
151        for timestamp, bar in data.iterrows():
152            # Execute pending orders at today's prices
153            for order in pending_orders:
154                fill = self.execution_model.execute(order, bar)
155                if fill:
156                    self.portfolio.process_fill(fill)
157                    self.strategy.on_fill(fill)
158                    self.trades.append(fill)
159 
160            pending_orders.clear()
161 
162            # Get current prices for equity calculation
163            prices = {data.index.name or "default": Decimal(str(bar["close"]))}
164            equity = self.portfolio.get_equity(prices)
165            self.equity_curve.append((timestamp, float(equity)))
166 
167            # Generate new orders for next bar
168            new_orders = self.strategy.on_bar(timestamp, data.loc[:timestamp])
169            pending_orders.extend(new_orders)
170 
171        return self._create_results()
172 
173    def _create_results(self) -> pd.DataFrame:
174        equity_df = pd.DataFrame(self.equity_curve, columns=["timestamp", "equity"])
175        equity_df.set_index("timestamp", inplace=True)
176        equity_df["returns"] = equity_df["equity"].pct_change()
177        return equity_df
178```
179 
180### Pattern 2: Vectorized Backtester (Fast)
181 
182```python
183import pandas as pd
184import numpy as np
185from typing import Callable, Dict, Any
186 
187class VectorizedBacktester:
188    """Fast vectorized backtester for simple strategies."""
189 
190    def __init__(
191        self,
192        initial_capital: float = 100000,
193        commission: float = 0.001,  # 0.1%
194        slippage: float = 0.0005   # 0.05%
195    ):
196        self.initial_capital = initial_capital
197        self.commission = commission
198        self.slippage = slippage
199 
200    def run(
201        self,
202        prices: pd.DataFrame,
203        signal_func: Callable[[pd.DataFrame], pd.Series]
204    ) -> Dict[str, Any]:
205        """
206        Run backtest with signal function.
207 
208        Args:
209            prices: DataFrame with 'close' column
210            signal_func: Function that returns position signals (-1, 0, 1)
211 
212        Returns:
213            Dictionary with results
214        """
215        # Generate signals (shifted to avoid look-ahead)
216        signals = signal_func(prices).shift(1).fillna(0)
217 
218        # Calculate returns
219        returns = prices["close"].pct_change()
220 
221        # Calculate strategy returns with costs
222        position_changes = signals.diff().abs()
223        trading_costs = position_changes * (self.commission + self.slippage)
224 
225        strategy_returns = signals * returns - trading_costs
226 
227        # Build equity curve
228        equity = (1 + strategy_returns).cumprod() * self.initial_capital
229 
230        # Calculate metrics
231        results = {
232            "equity": equity,
233            "returns": strategy_returns,
234            "signals": signals,
235            "metrics": self._calculate_metrics(strategy_returns, equity)
236        }
237 
238        return results
239 
240    def _calculate_metrics(
241        self,
242        returns: pd.Series,
243        equity: pd.Series
244    ) -> Dict[str, float]:
245        """Calculate performance metrics."""
246        total_return = (equity.iloc[-1] / self.initial_capital) - 1
247        annual_return = (1 + total_return) ** (252 / len(returns)) - 1
248        annual_vol = returns.std() * np.sqrt(252)
249        sharpe = annual_return / annual_vol if annual_vol > 0 else 0
250 
251        # Drawdown
252        rolling_max = equity.cummax()
253        drawdown = (equity - rolling_max) / rolling_max
254        max_drawdown = drawdown.min()
255 
256        # Win rate
257        winning_days = (returns > 0).sum()
258        total_days = (returns != 0).sum()
259        win_rate = winning_days / total_days if total_days > 0 else 0
260 
261        return {
262            "total_return": total_return,
263            "annual_return": annual_return,
264            "annual_volatility": annual_vol,
265            "sharpe_ratio": sharpe,
266            "max_drawdown": max_drawdown,
267            "win_rate": win_rate,
268            "num_trades": int((returns != 0).sum())
269        }
270 
271# Example usage
272def momentum_signal(prices: pd.DataFrame, lookback: int = 20) -> pd.Series:
273    """Simple momentum strategy: long when price > SMA, else flat."""
274    sma = prices["close"].rolling(lookback).mean()
275    return (prices["close"] > sma).astype(int)
276 
277# Run backtest
278# backtester = VectorizedBacktester()
279# results = backtester.run(price_data, lambda p: momentum_signal(p, 50))
280```
281 
282### Pattern 3: Walk-Forward Optimization
283 
284```python
285from typing import Callable, Dict, List, Tuple, Any
286import pandas as pd
287import numpy as np
288from itertools import product
289 
290class WalkForwardOptimizer:
291    """Walk-forward analysis with anchored or rolling windows."""
292 
293    def __init__(
294        self,
295        train_period: int,
296        test_period: int,
297        anchored: bool = False,
298        n_splits: int = None
299    ):
300        """
301        Args:
302            train_period: Number of bars in training window
303            test_period: Number of bars in test window
304            anchored: If True, training always starts from beginning
305            n_splits: Number of train/test splits (auto-calculated if None)
306        """
307        self.train_period = train_period
308        self.test_period = test_period
309        self.anchored = anchored
310        self.n_splits = n_splits
311 
312    def generate_splits(
313        self,
314        data: pd.DataFrame
315    ) -> List[Tuple[pd.DataFrame, pd.DataFrame]]:
316        """Generate train/test splits."""
317        splits = []
318        n = len(data)
319 
320        if self.n_splits:
321            step = (n - self.train_period) // self.n_splits
322        else:
323            step = self.test_period
324 
325        start = 0
326        while start + self.train_period + self.test_period <= n:
327            if self.anchored:
328                train_start = 0
329            else:
330                train_start = start
331 
332            train_end = start + self.train_period
333            test_end = min(train_end + self.test_period, n)
334 
335            train_data = data.iloc[train_start:train_end]
336            test_data = data.iloc[train_end:test_end]
337 
338            splits.append((train_data, test_data))
339            start += step
340 
341        return splits
342 
343    def optimize(
344        self,
345        data: pd.DataFrame,
346        strategy_func: Callable,
347        param_grid: Dict[str, List],
348        metric: str = "sharpe_ratio"
349    ) -> Dict[str, Any]:
350        """
351        Run walk-forward optimization.
352 
353        Args:
354            data: Full dataset
355            strategy_func: Function(data, **params) -> results dict
356            param_grid: Parameter combinations to test
357            metric: Metric to optimize
358 
359        Returns:
360            Combined results from all test periods
361        """
362        splits = self.generate_splits(data)
363        all_results = []
364        optimal_params_history = []
365 
366        for i, (train_data, test_data) in enumerate(splits):
367            # Optimize on training data
368            best_params, best_metric = self._grid_search(
369                train_data, strategy_func, param_grid, metric
370            )
371            optimal_params_history.append(best_params)
372 
373            # Test with optimal params
374            test_results = strategy_func(test_data, **best_params)
375            test_results["split"] = i
376            test_results["params"] = best_params
377            all_results.append(test_results)
378 
379            print(f"Split {i+1}/{len(splits)}: "
380                  f"Best {metric}={best_metric:.4f}, params={best_params}")
381 
382        return {
383            "split_results": all_results,
384            "param_history": optimal_params_history,
385            "combined_equity": self._combine_equity_curves(all_results)
386        }
387 
388    def _grid_search(
389        self,
390        data: pd.DataFrame,
391        strategy_func: Callable,
392        param_grid: Dict[str, List],
393        metric: str
394    ) -> Tuple[Dict, float]:
395        """Grid search for best parameters."""
396        best_params = None
397        best_metric = -np.inf
398 
399        # Generate all parameter combinations
400        param_names = list(param_grid.keys())
401        param_values = list(param_grid.values())
402 
403        for values in product(*param_values):
404            params = dict(zip(param_names, values))
405            results = strategy_func(data, **params)
406 
407            if results["metrics"][metric] > best_metric:
408                best_metric = results["metrics"][metric]
409                best_params = params
410 
411        return best_params, best_metric
412 
413    def _combine_equity_curves(
414        self,
415        results: List[Dict]
416    ) -> pd.Series:
417        """Combine equity curves from all test periods."""
418        combined = pd.concat([r["equity"] for r in results])
419        return combined
420```
421 
422### Pattern 4: Monte Carlo Analysis
423 
424```python
425import numpy as np
426import pandas as pd
427from typing import Dict, List
428 
429class MonteCarloAnalyzer:
430    """Monte Carlo simulation for strategy robustness."""
431 
432    def __init__(self, n_simulations: int = 1000, confidence: float = 0.95):
433        self.n_simulations = n_simulations
434        self.confidence = confidence
435 
436    def bootstrap_returns(
437        self,
438        returns: pd.Series,
439        n_periods: int = None
440    ) -> np.ndarray:
441        """
442        Bootstrap simulation by resampling returns.
443 
444        Args:
445            returns: Historical returns series
446            n_periods: Length of each simulation (default: same as input)
447 
448        Returns:
449            Array of shape (n_simulations, n_periods)
450        """
451        if n_periods is None:
452            n_periods = len(returns)
453 
454        simulations = np.zeros((self.n_simulations, n_periods))
455 
456        for i in range(self.n_simulations):
457            # Resample with replacement
458            simulated_returns = np.random.choice(
459                returns.values,
460                size=n_periods,
461                replace=True
462            )
463            simulations[i] = simulated_returns
464 
465        return simulations
466 
467    def analyze_drawdowns(
468        self,
469        returns: pd.Series
470    ) -> Dict[str, float]:
471        """Analyze drawdown distribution via simulation."""
472        simulations = self.bootstrap_returns(returns)
473 
474        max_drawdowns = []
475        for sim_returns in simulations:
476            equity = (1 + sim_returns).cumprod()
477            rolling_max = np.maximum.accumulate(equity)
478            drawdowns = (equity - rolling_max) / rolling_max
479            max_drawdowns.append(drawdowns.min())
480 
481        max_drawdowns = np.array(max_drawdowns)
482 
483        return {
484            "expected_max_dd": np.mean(max_drawdowns),
485            "median_max_dd": np.median(max_drawdowns),
486            f"worst_{int(self.confidence*100)}pct": np.percentile(
487                max_drawdowns, (1 - self.confidence) * 100
488            ),
489            "worst_case": max_drawdowns.min()
490        }
491 
492    def probability_of_loss(
493        self,
494        returns: pd.Series,
495        holding_periods: List[int] = [21, 63, 126, 252]
496    ) -> Dict[int, float]:
497        """Calculate probability of loss over various holding periods."""
498        results = {}
499 
500        for period in holding_periods:
501            if period > len(returns):
502                continue
503 
504            simulations = self.bootstrap_returns(returns, period)
505            total_returns = (1 + simulations).prod(axis=1) - 1
506            prob_loss = (total_returns < 0).mean()
507            results[period] = prob_loss
508 
509        return results
510 
511    def confidence_interval(
512        self,
513        returns: pd.Series,
514        periods: int = 252
515    ) -> Dict[str, float]:
516        """Calculate confidence interval for future returns."""
517        simulations = self.bootstrap_returns(returns, periods)
518        total_returns = (1 + simulations).prod(axis=1) - 1
519 
520        lower = (1 - self.confidence) / 2
521        upper = 1 - lower
522 
523        return {
524            "expected": total_returns.mean(),
525            "lower_bound": np.percentile(total_returns, lower * 100),
526            "upper_bound": np.percentile(total_returns, upper * 100),
527            "std": total_returns.std()
528        }
529```
530 
531## Performance Metrics
532 
533```python
534def calculate_metrics(returns: pd.Series, rf_rate: float = 0.02) -> Dict[str, float]:
535    """Calculate comprehensive performance metrics."""
536    # Annualization factor (assuming daily returns)
537    ann_factor = 252
538 
539    # Basic metrics
540    total_return = (1 + returns).prod() - 1
541    annual_return = (1 + total_return) ** (ann_factor / len(returns)) - 1
542    annual_vol = returns.std() * np.sqrt(ann_factor)
543 
544    # Risk-adjusted returns
545    sharpe = (annual_return - rf_rate) / annual_vol if annual_vol > 0 else 0
546 
547    # Sortino (downside deviation)
548    downside_returns = returns[returns < 0]
549    downside_vol = downside_returns.std() * np.sqrt(ann_factor)
550    sortino = (annual_return - rf_rate) / downside_vol if downside_vol > 0 else 0
551 
552    # Calmar ratio
553    equity = (1 + returns).cumprod()
554    rolling_max = equity.cummax()
555    drawdowns = (equity - rolling_max) / rolling_max
556    max_drawdown = drawdowns.min()
557    calmar = annual_return / abs(max_drawdown) if max_drawdown != 0 else 0
558 
559    # Win rate and profit factor
560    wins = returns[returns > 0]
561    losses = returns[returns < 0]
562    win_rate = len(wins) / len(returns[returns != 0]) if len(returns[returns != 0]) > 0 else 0
563    profit_factor = wins.sum() / abs(losses.sum()) if losses.sum() != 0 else np.inf
564 
565    return {
566        "total_return": total_return,
567        "annual_return": annual_return,
568        "annual_volatility": annual_vol,
569        "sharpe_ratio": sharpe,
570        "sortino_ratio": sortino,
571        "calmar_ratio": calmar,
572        "max_drawdown": max_drawdown,
573        "win_rate": win_rate,
574        "profit_factor": profit_factor,
575        "num_trades": int((returns != 0).sum())
576    }
577```
578
Preparing the source view

Backtesting Frameworks

references/details.md