geographic-clustering

Geographic Clustering

Safety Notice

This listing is imported from skills.sh public index metadata. Review upstream SKILL.md and repository scripts before running.

Copy this and send it to your AI assistant to learn

Install skill "geographic-clustering" with this command: npx skills add dadbodgeoff/drift/dadbodgeoff-drift-geographic-clustering

Geographic Clustering

Grid-based clustering with medoid finding and risk scoring from event density.

When to Use This Skill

  • Clustering thousands of geo-located events for visualization

  • Need O(n) performance instead of O(n²) distance-based clustering

  • Want actual data points as map markers (medoids, not synthetic centroids)

  • Calculating risk scores from cluster characteristics

Core Concepts

Grid-based clustering is much faster than distance-based algorithms:

  • O(n) grouping by grid cell vs O(n²) for distance-based

  • Medoid finding gives actual data point as center (better for map markers)

  • Multi-factor risk scoring combines density, sentiment, and recency

Implementation

TypeScript

interface ProcessedEvent { id: string; lat?: number; lon?: number; sentiment: number; seenDate: Date; sourceCountry: string; }

interface Cluster { id: string; events: ProcessedEvent[]; centroidLat: number; centroidLon: number; countryCode: string; }

interface RiskPrediction { id: string; lat: number; lon: number; riskScore: number; sentiment: number; eventCount: number; topSignals: string[]; summary: string; countryCode: string; }

// Grid cell size in degrees (~100km at equator) const GRID_SIZE = 1.0;

/**

  • Generate grid cell key for coordinates */ function getGridKey(lat: number, lon: number): string { const gridLat = Math.floor(lat / GRID_SIZE) * GRID_SIZE; const gridLon = Math.floor(lon / GRID_SIZE) * GRID_SIZE; return ${gridLat},${gridLon}; }

/**

  • Cluster events by geographic grid - O(n) complexity */ function clusterEvents( events: ProcessedEvent[], minClusterSize: number = 3 ): Cluster[] { const grid = new Map<string, ProcessedEvent[]>();

// O(n) grouping by grid cell for (const event of events) { if (event.lat === undefined || event.lon === undefined) continue;

const key = getGridKey(event.lat, event.lon);
if (!grid.has(key)) {
  grid.set(key, []);
}
grid.get(key)!.push(event);

}

// Convert grid cells to clusters const clusters: Cluster[] = []; let clusterId = 1;

for (const [key, clusterEvents] of grid) { if (clusterEvents.length < minClusterSize) continue;

const centroidLat = clusterEvents.reduce((sum, e) => sum + (e.lat || 0), 0) 
                    / clusterEvents.length;
const centroidLon = clusterEvents.reduce((sum, e) => sum + (e.lon || 0), 0) 
                    / clusterEvents.length;

clusters.push({
  id: `cluster-${clusterId++}`,
  events: clusterEvents,
  centroidLat,
  centroidLon,
  countryCode: getDominantCountry(clusterEvents),
});

}

return clusters; }

/**

  • Find the medoid (most central actual point) in a cluster */ function findMedoid(events: ProcessedEvent[]): ProcessedEvent { if (events.length === 0) throw new Error('Cannot find medoid of empty cluster'); if (events.length === 1) return events[0];

const centroidLat = events.reduce((sum, e) => sum + (e.lat || 0), 0) / events.length; const centroidLon = events.reduce((sum, e) => sum + (e.lon || 0), 0) / events.length;

let medoid = events[0]; let minDist = Infinity;

for (const event of events) { const dist = Math.hypot( (event.lat || 0) - centroidLat, (event.lon || 0) - centroidLon ); if (dist < minDist) { minDist = dist; medoid = event; } }

return medoid; }

/**

  • Get most common country code in cluster */ function getDominantCountry(events: ProcessedEvent[]): string { const counts: Record<string, number> = {};

for (const event of events) { const country = event.sourceCountry || 'XX'; counts[country] = (counts[country] || 0) + 1; }

let maxCount = 0; let dominant = 'XX';

for (const [country, count] of Object.entries(counts)) { if (count > maxCount) { maxCount = count; dominant = country; } }

return dominant; }

/**

  • Calculate risk score for a cluster (0-100)
  • Multi-factor: density + sentiment + recency */ function calculateRiskScore(cluster: Cluster): number { const eventCount = cluster.events.length; const avgSentiment = cluster.events.reduce((sum, e) => sum + e.sentiment, 0) / eventCount;

// Factor 1: Event density (0-40 points) const eventFactor = Math.min(eventCount / 300, 1) * 40;

// Factor 2: Sentiment (0-30 points) const sentimentFactor = Math.abs(Math.min(avgSentiment, 0)) * 30;

// Factor 3: Recency (0-30 points) const now = Date.now(); const avgAge = cluster.events.reduce((sum, e) => { return sum + (now - e.seenDate.getTime()); }, 0) / eventCount; const hoursOld = avgAge / (1000 * 60 * 60); const recencyFactor = Math.max(0, 30 - hoursOld);

const total = eventFactor + sentimentFactor + recencyFactor; return Math.round(Math.min(100, Math.max(0, total))); }

/**

  • Convert clusters to risk predictions for API/UI */ function clustersToPredictions(clusters: Cluster[]): RiskPrediction[] { const predictions: RiskPrediction[] = [];

for (const cluster of clusters) { const medoid = findMedoid(cluster.events); const riskScore = calculateRiskScore(cluster); const avgSentiment = cluster.events.reduce((sum, e) => sum + e.sentiment, 0) / cluster.events.length;

const riskLevel = riskScore >= 80 ? 'Critical' 
                : riskScore >= 60 ? 'High' 
                : riskScore >= 40 ? 'Moderate' 
                : 'Low';

predictions.push({
  id: cluster.id,
  lat: medoid.lat || cluster.centroidLat,
  lon: medoid.lon || cluster.centroidLon,
  riskScore,
  sentiment: avgSentiment,
  eventCount: cluster.events.length,
  topSignals: generateSignals(cluster, riskScore),
  summary: `${riskLevel} risk in ${cluster.countryCode}: ${cluster.events.length} events`,
  countryCode: cluster.countryCode,
});

}

return predictions.sort((a, b) => b.riskScore - a.riskScore); }

function generateSignals(cluster: Cluster, riskScore: number): string[] { const signals: string[] = []; const avgSentiment = cluster.events.reduce((sum, e) => sum + e.sentiment, 0) / cluster.events.length;

signals.push(${cluster.events.length} events);

if (avgSentiment < -0.5) signals.push('Very negative tone'); else if (avgSentiment < -0.2) signals.push('Negative tone');

if (riskScore >= 80) signals.push('Critical risk'); else if (riskScore >= 60) signals.push('High risk');

return signals.slice(0, 3); }

Usage Examples

Basic Clustering Pipeline

async function processEvents() { const events = await fetchEvents();

// Cluster geographically (min 3 events per cluster) const clusters = clusterEvents(events, 3);

// Convert to predictions for map const predictions = clustersToPredictions(clusters);

// Return top 50 for visualization return predictions.slice(0, 50); }

Best Practices

  • Grid-based O(n) - Much faster than distance-based for large datasets

  • Medoid vs Centroid - Use actual data point for map markers (won't be in ocean)

  • Multi-factor risk - Combine density, sentiment, and recency

  • Minimum cluster size - Filter noise with threshold (3+ events)

  • Configurable parameters - Make grid size and thresholds adjustable

Common Mistakes

  • Distance-based clustering on large datasets (O(n²) is too slow)

  • Synthetic centroids for map markers (may be in invalid locations)

  • Single-factor risk scoring (misses nuance)

  • No minimum cluster size (noise becomes clusters)

  • Hardcoded risk thresholds (should be configurable)

Related Patterns

  • deduplication - Deduplicate events before clustering

  • batch-processing - Process clusters efficiently

  • snapshot-aggregation - Aggregate cluster data for dashboards

Source Transparency

This detail page is rendered from real SKILL.md content. Trust labels are metadata-based hints, not a safety guarantee.

Related Skills

Related by shared tags or category signals.

General

oauth-social-login

No summary provided by upstream source.

Repository SourceNeeds Review
General

sse-streaming

No summary provided by upstream source.

Repository SourceNeeds Review
General

multi-tenancy

No summary provided by upstream source.

Repository SourceNeeds Review