Chapter 4: Sample Weights¶

AFML Ch. 4 -- Handling overlapping labels with uniqueness-aware sampling.

When labels overlap in time (e.g., a 20-day holding period event starting on day 5 overlaps with one starting on day 10), the samples are not independent. Standard bootstrap and cross-validation methods break down because they assume i.i.d. samples. This chapter introduces techniques to measure and correct for this overlap.

Topics covered:

Concurrent events counting
Average uniqueness of labels
Indicator matrix construction
Sequential bootstrap vs standard bootstrap
Return attribution weights
Time decay of sample weights

In [1]:

Copied!





import numpy as np
import matplotlib.pyplot as plt
import pymlfinance

%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['figure.dpi'] = 150
plt.rcParams['font.size'] = 15
plt.rcParams['axes.titlesize'] = 18
plt.rcParams['axes.labelsize'] = 15
plt.rcParams['xtick.labelsize'] = 13
plt.rcParams['ytick.labelsize'] = 13
plt.rcParams['legend.fontsize'] = 13
import numpy as np
import matplotlib.pyplot as plt
import pymlfinance

%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['figure.dpi'] = 150
plt.rcParams['font.size'] = 15
plt.rcParams['axes.titlesize'] = 18
plt.rcParams['axes.labelsize'] = 15
plt.rcParams['xtick.labelsize'] = 13
plt.rcParams['ytick.labelsize'] = 13
plt.rcParams['legend.fontsize'] = 13

Generate Synthetic Overlapping Events¶

We create 100 events distributed across 500 bars. Each event has a random duration between 5 and 30 bars, causing significant overlap between events. This overlap is typical in financial applications where holding periods overlap.

In [2]:

Copied!





np.random.seed(42)

n_bars = 500
n_events = 100

# Create events with varying overlap
entries = sorted(np.random.choice(n_bars - 20, n_events, replace=False))
durations = np.random.randint(5, 30, n_events)
events = [(int(e), min(int(e + d), n_bars - 1)) for e, d in zip(entries, durations)]

print(f"Generated {n_events} events across {n_bars} bars")
print(f"  Average duration: {np.mean(durations):.1f} bars")
np.random.seed(42)

n_bars = 500
n_events = 100

# Create events with varying overlap
entries = sorted(np.random.choice(n_bars - 20, n_events, replace=False))
durations = np.random.randint(5, 30, n_events)
events = [(int(e), min(int(e + d), n_bars - 1)) for e, d in zip(entries, durations)]

print(f"Generated {n_events} events across {n_bars} bars")
print(f"  Average duration: {np.mean(durations):.1f} bars")

Generated 100 events across 500 bars
  Average duration: 16.4 bars

Concurrent Events¶

For each bar, we count how many events are active (i.e., the bar falls between the event's entry and exit). High concurrency means many events share the same bars, reducing the effective information content of each sample.

In [3]:

Copied!





co_events = pymlfinance.sampling.num_co_events(events, n_bars)
print(f"--- Concurrent Events ---")
print(f"  Max concurrent: {max(co_events)}")
print(f"  Mean concurrent: {np.mean(co_events):.2f}")
print(f"  Bars with no events: {sum(1 for c in co_events if c == 0)}")
co_events = pymlfinance.sampling.num_co_events(events, n_bars)
print(f"--- Concurrent Events ---")
print(f"  Max concurrent: {max(co_events)}")
print(f"  Mean concurrent: {np.mean(co_events):.2f}")
print(f"  Bars with no events: {sum(1 for c in co_events if c == 0)}")

--- Concurrent Events ---
  Max concurrent: 9
  Mean concurrent: 3.48
  Bars with no events: 7

In [4]:

Copied!





# Concurrent events line plot
fig, ax = plt.subplots(figsize=(14, 5))
ax.fill_between(range(n_bars), co_events, alpha=0.4, color='steelblue')
ax.plot(co_events, color='steelblue', linewidth=0.8)
ax.axhline(y=np.mean(co_events), color='red', linestyle='--', linewidth=1,
           label=f'Mean = {np.mean(co_events):.2f}')
ax.set_xlabel('Bar Index')
ax.set_ylabel('Number of Concurrent Events')
ax.set_title('Concurrent Events Over Time')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Concurrent events line plot
fig, ax = plt.subplots(figsize=(14, 5))
ax.fill_between(range(n_bars), co_events, alpha=0.4, color='steelblue')
ax.plot(co_events, color='steelblue', linewidth=0.8)
ax.axhline(y=np.mean(co_events), color='red', linestyle='--', linewidth=1,
           label=f'Mean = {np.mean(co_events):.2f}')
ax.set_xlabel('Bar Index')
ax.set_ylabel('Number of Concurrent Events')
ax.set_title('Concurrent Events Over Time')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

No description has been provided for this image

Average Uniqueness¶

Average uniqueness measures how "informative" each event is. An event that overlaps with many others has low uniqueness (close to 0), while a non-overlapping event has uniqueness = 1.0. This metric is used to weight samples appropriately.

In [5]:

Copied!





uniqueness = pymlfinance.sampling.average_uniqueness(events, n_bars)
print(f"--- Average Uniqueness ---")
print(f"  Mean uniqueness: {np.mean(uniqueness):.4f}")
print(f"  Min uniqueness:  {np.min(uniqueness):.4f}")
print(f"  Max uniqueness:  {np.max(uniqueness):.4f}")
print(f"  (1.0 = fully unique, lower = more overlap)")
uniqueness = pymlfinance.sampling.average_uniqueness(events, n_bars)
print(f"--- Average Uniqueness ---")
print(f"  Mean uniqueness: {np.mean(uniqueness):.4f}")
print(f"  Min uniqueness:  {np.min(uniqueness):.4f}")
print(f"  Max uniqueness:  {np.max(uniqueness):.4f}")
print(f"  (1.0 = fully unique, lower = more overlap)")

--- Average Uniqueness ---
  Mean uniqueness: 0.2762
  Min uniqueness:  0.1181
  Max uniqueness:  0.6465
  (1.0 = fully unique, lower = more overlap)

In [6]:

Copied!





# Uniqueness distribution histogram
fig, ax = plt.subplots(figsize=(8, 5))
ax.hist(uniqueness, bins=20, color='steelblue', edgecolor='black', linewidth=0.5, alpha=0.8)
ax.axvline(x=np.mean(uniqueness), color='red', linestyle='--', linewidth=1.5,
           label=f'Mean = {np.mean(uniqueness):.4f}')
ax.set_xlabel('Average Uniqueness')
ax.set_ylabel('Count')
ax.set_title('Distribution of Event Uniqueness')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Uniqueness distribution histogram
fig, ax = plt.subplots(figsize=(8, 5))
ax.hist(uniqueness, bins=20, color='steelblue', edgecolor='black', linewidth=0.5, alpha=0.8)
ax.axvline(x=np.mean(uniqueness), color='red', linestyle='--', linewidth=1.5,
           label=f'Mean = {np.mean(uniqueness):.4f}')
ax.set_xlabel('Average Uniqueness')
ax.set_ylabel('Count')
ax.set_title('Distribution of Event Uniqueness')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

Indicator Matrix¶

The indicator matrix is a (bars x events) binary matrix where entry (t, i) is 1 if event i is active at bar t. This matrix is the foundation for computing uniqueness and running the sequential bootstrap.

In [ ]:

Copied!





ind_matrix = pymlfinance.sampling.get_indicator_matrix(events, n_bars)
print(f"--- Indicator Matrix ---")
print(f"  Shape: {ind_matrix.shape}  (bars x events)")
print(f"  Non-zero entries: {np.sum(ind_matrix > 0)}")
print(f"  Sparsity: {1 - np.sum(ind_matrix > 0) / ind_matrix.size:.2%}")
ind_matrix = pymlfinance.sampling.get_indicator_matrix(events, n_bars)
print(f"--- Indicator Matrix ---")
print(f"  Shape: {ind_matrix.shape}  (bars x events)")
print(f"  Non-zero entries: {np.sum(ind_matrix > 0)}")
print(f"  Sparsity: {1 - np.sum(ind_matrix > 0) / ind_matrix.size:.2%}")

Sequential Bootstrap vs Standard Bootstrap¶

Standard bootstrap draws samples uniformly at random, ignoring temporal overlap. The sequential bootstrap draws each sample proportional to its average uniqueness given the samples already drawn, reducing redundancy.

Sequential bootstrap should yield higher average uniqueness in the drawn samples.

In [8]:

Copied!





seq_samples = pymlfinance.sampling.seq_bootstrap(ind_matrix, num_samples=n_events, seed=42)
std_samples = pymlfinance.sampling.standard_bootstrap(n_events, n_events, seed=42)
print(f"--- Bootstrap Comparison ---")
print(f"  Sequential bootstrap unique samples: {len(set(seq_samples))}/{n_events}")
print(f"  Standard bootstrap unique samples:   {len(set(std_samples))}/{n_events}")

# Monte Carlo comparison
comparison = pymlfinance.sampling.compare_bootstraps(ind_matrix, n_events, num_trials=50, seed=42)
print(f"  Seq. avg uniqueness: {comparison.seq_uniqueness:.4f}")
print(f"  Std. avg uniqueness: {comparison.std_uniqueness:.4f}")
seq_samples = pymlfinance.sampling.seq_bootstrap(ind_matrix, num_samples=n_events, seed=42)
std_samples = pymlfinance.sampling.standard_bootstrap(n_events, n_events, seed=42)
print(f"--- Bootstrap Comparison ---")
print(f"  Sequential bootstrap unique samples: {len(set(seq_samples))}/{n_events}")
print(f"  Standard bootstrap unique samples:   {len(set(std_samples))}/{n_events}")

# Monte Carlo comparison
comparison = pymlfinance.sampling.compare_bootstraps(ind_matrix, n_events, num_trials=50, seed=42)
print(f"  Seq. avg uniqueness: {comparison.seq_uniqueness:.4f}")
print(f"  Std. avg uniqueness: {comparison.std_uniqueness:.4f}")

--- Bootstrap Comparison ---
  Sequential bootstrap unique samples: 69/100
  Standard bootstrap unique samples:   65/100
  Seq. avg uniqueness: 0.2707
  Std. avg uniqueness: 0.2598

In [9]:

Copied!





# Sequential vs standard bootstrap comparison
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Unique sample counts
methods = ['Sequential', 'Standard']
unique_counts = [len(set(seq_samples)), len(set(std_samples))]
colors = ['#55A868', '#C44E52']
bars = ax1.bar(methods, unique_counts, color=colors, edgecolor='black', linewidth=0.5)
for bar, count in zip(bars, unique_counts):
    ax1.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.5,
            str(count), ha='center', va='bottom', fontweight='bold')
ax1.set_ylabel('Unique Samples')
ax1.set_title(f'Unique Samples Drawn (out of {n_events})')
ax1.set_ylim(0, n_events * 1.1)
ax1.axhline(y=n_events, color='gray', linestyle='--', alpha=0.5, label='Total events')
ax1.legend()

# Average uniqueness comparison
avg_uniq = [comparison.seq_uniqueness, comparison.std_uniqueness]
bars2 = ax2.bar(methods, avg_uniq, color=colors, edgecolor='black', linewidth=0.5)
for bar, val in zip(bars2, avg_uniq):
    ax2.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.005,
            f'{val:.4f}', ha='center', va='bottom', fontweight='bold')
ax2.set_ylabel('Average Uniqueness')
ax2.set_title('Average Uniqueness (50 Monte Carlo trials)')
ax2.set_ylim(0, 1.0)

plt.tight_layout()
plt.show()
# Sequential vs standard bootstrap comparison
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Unique sample counts
methods = ['Sequential', 'Standard']
unique_counts = [len(set(seq_samples)), len(set(std_samples))]
colors = ['#55A868', '#C44E52']
bars = ax1.bar(methods, unique_counts, color=colors, edgecolor='black', linewidth=0.5)
for bar, count in zip(bars, unique_counts):
    ax1.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.5,
            str(count), ha='center', va='bottom', fontweight='bold')
ax1.set_ylabel('Unique Samples')
ax1.set_title(f'Unique Samples Drawn (out of {n_events})')
ax1.set_ylim(0, n_events * 1.1)
ax1.axhline(y=n_events, color='gray', linestyle='--', alpha=0.5, label='Total events')
ax1.legend()

# Average uniqueness comparison
avg_uniq = [comparison.seq_uniqueness, comparison.std_uniqueness]
bars2 = ax2.bar(methods, avg_uniq, color=colors, edgecolor='black', linewidth=0.5)
for bar, val in zip(bars2, avg_uniq):
    ax2.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.005,
            f'{val:.4f}', ha='center', va='bottom', fontweight='bold')
ax2.set_ylabel('Average Uniqueness')
ax2.set_title('Average Uniqueness (50 Monte Carlo trials)')
ax2.set_ylim(0, 1.0)

plt.tight_layout()
plt.show()

Return Attribution Weights¶

Return attribution distributes each event's return across the bars it spans, weighted by the inverse of concurrency at each bar. Events that overlap less receive higher attribution for each bar.

In [10]:

Copied!





# returns must be per-event (same length as events list)
event_returns = np.random.randn(n_events) * 0.01  # per-event returns
attr_weights = pymlfinance.sampling.return_attribution_weights(events, event_returns, n_bars)
print(f"--- Return Attribution Weights ---")
print(f"  Mean weight: {np.mean(attr_weights):.6f}")
print(f"  Std weight:  {np.std(attr_weights):.6f}")
print(f"  Min weight:  {np.min(attr_weights):.6f}")
print(f"  Max weight:  {np.max(attr_weights):.6f}")
# returns must be per-event (same length as events list)
event_returns = np.random.randn(n_events) * 0.01  # per-event returns
attr_weights = pymlfinance.sampling.return_attribution_weights(events, event_returns, n_bars)
print(f"--- Return Attribution Weights ---")
print(f"  Mean weight: {np.mean(attr_weights):.6f}")
print(f"  Std weight:  {np.std(attr_weights):.6f}")
print(f"  Min weight:  {np.min(attr_weights):.6f}")
print(f"  Max weight:  {np.max(attr_weights):.6f}")

--- Return Attribution Weights ---
  Mean weight: 1.000000
  Std weight:  0.855557
  Min weight:  0.007111
  Max weight:  4.349096

Time Decay¶

Time decay applies a linear decay to sample weights, giving more importance to recent observations. The oldest_weight parameter controls the decay:

oldest_weight=0.0 -- full decay (oldest sample has zero weight)
oldest_weight=0.5 -- half decay
oldest_weight=1.0 -- no decay (uniform weights)

In [11]:

Copied!





decayed_full = pymlfinance.sampling.time_decay(attr_weights, oldest_weight=0.0)
decayed_half = pymlfinance.sampling.time_decay(attr_weights, oldest_weight=0.5)
decayed_none = pymlfinance.sampling.time_decay(attr_weights, oldest_weight=1.0)
print(f"--- Time Decay ---")
print(f"  Full decay (oldest=0.0): first={decayed_full[0]:.4f}, last={decayed_full[-1]:.4f}")
print(f"  Half decay (oldest=0.5): first={decayed_half[0]:.4f}, last={decayed_half[-1]:.4f}")
print(f"  No decay   (oldest=1.0): first={decayed_none[0]:.4f}, last={decayed_none[-1]:.4f}")
decayed_full = pymlfinance.sampling.time_decay(attr_weights, oldest_weight=0.0)
decayed_half = pymlfinance.sampling.time_decay(attr_weights, oldest_weight=0.5)
decayed_none = pymlfinance.sampling.time_decay(attr_weights, oldest_weight=1.0)
print(f"--- Time Decay ---")
print(f"  Full decay (oldest=0.0): first={decayed_full[0]:.4f}, last={decayed_full[-1]:.4f}")
print(f"  Half decay (oldest=0.5): first={decayed_half[0]:.4f}, last={decayed_half[-1]:.4f}")
print(f"  No decay   (oldest=1.0): first={decayed_none[0]:.4f}, last={decayed_none[-1]:.4f}")

--- Time Decay ---
  Full decay (oldest=0.0): first=0.0000, last=1.5025
  Half decay (oldest=0.5): first=2.1745, last=1.5025
  No decay   (oldest=1.0): first=4.3491, last=1.5025

In [12]:

Copied!





# Time decay comparison plot
fig, ax = plt.subplots(figsize=(12, 5))
x = np.arange(len(attr_weights))
ax.plot(x, decayed_full, label='Full decay (oldest=0.0)', color='#C44E52', linewidth=1.2)
ax.plot(x, decayed_half, label='Half decay (oldest=0.5)', color='#DD8452', linewidth=1.2)
ax.plot(x, decayed_none, label='No decay (oldest=1.0)', color='#55A868', linewidth=1.2)
ax.set_xlabel('Event Index')
ax.set_ylabel('Weight')
ax.set_title('Time Decay Applied to Attribution Weights')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Time decay comparison plot
fig, ax = plt.subplots(figsize=(12, 5))
x = np.arange(len(attr_weights))
ax.plot(x, decayed_full, label='Full decay (oldest=0.0)', color='#C44E52', linewidth=1.2)
ax.plot(x, decayed_half, label='Half decay (oldest=0.5)', color='#DD8452', linewidth=1.2)
ax.plot(x, decayed_none, label='No decay (oldest=1.0)', color='#55A868', linewidth=1.2)
ax.set_xlabel('Event Index')
ax.set_ylabel('Weight')
ax.set_title('Time Decay Applied to Attribution Weights')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

Exercises¶

Overlap sensitivity: Increase event overlap (use longer durations, e.g., np.random.randint(20, 60, n_events)) and observe how uniqueness drops.
Bootstrap sample size: Compare sequential vs standard bootstrap with different numbers of samples (50, 100, 200). Does the uniqueness gap widen?
Time decay curves: Try different time decay settings and see how weight distributions shift. Plot the cumulative weight distribution for each decay level.