NLP Toolkit

Advanced NLP analysis for AI content detection using statistical measures.

Implementation

/**
 * Analyze text using NLP metrics
 * @param {string} text - Text to analyze
 * @param {object} options - Configuration options
 * @returns {object} NLP analysis results
 */
async function analyzeText(text, options = {}) {
  const {
    perplexityThreshold = 45.0,
    burstinessThreshold = 0.35,
    minTextLength = 50
  } = options;

  if (text.length < minTextLength) {
    return {
      error: 'Text too short for analysis',
      minLength: minTextLength
    };
  }

  // Calculate perplexity (simplified)
  const perplexity = calculatePerplexity(text);

  // Calculate burstiness
  const burstiness = calculateBurstiness(text);

  // Calculate entropy
  const entropy = calculateEntropy(text);

  // Token distribution analysis
  const tokenStats = analyzeTokenDistribution(text);

  // Determine if AI-generated
  const isAI = perplexity < perplexityThreshold && burstiness < burstinessThreshold;
  const confidence = calculateConfidence(perplexity, burstiness, entropy);

  return {
    isAI,
    confidence: Math.round(confidence * 100),
    metrics: {
      perplexity: Math.round(perplexity * 100) / 100,
      burstiness: Math.round(burstiness * 100) / 100,
      entropy: Math.round(entropy * 100) / 100
    },
    tokenStats,
    thresholds: {
      perplexity: perplexityThreshold,
      burstiness: burstinessThreshold
    },
    explanation: isAI ? 
      'Low perplexity and uniform burstiness suggest AI generation' :
      'Natural variation in metrics suggests human writing'
  };
}

/**
 * Calculate perplexity score (simplified)
 */
function calculatePerplexity(text) {
  const words = text.toLowerCase().split(/\s+/);
  const uniqueWords = new Set(words);
  
  // Simplified perplexity: ratio of unique words to total
  // Real perplexity requires language model
  const ratio = uniqueWords.size / words.length;
  const perplexity = 100 / ratio; // Inverse relationship
  
  return Math.min(perplexity, 100);
}

/**
 * Calculate burstiness (variation in sentence length)
 */
function calculateBurstiness(text) {
  const sentences = text.split(/[.!?]+/).filter(s => s.trim());
  if (sentences.length < 2) return 0;

  const lengths = sentences.map(s => s.split(/\s+/).length);
  const avg = lengths.reduce((a, b) => a + b, 0) / lengths.length;
  const variance = lengths.reduce((sum, len) => sum + Math.pow(len - avg, 2), 0) / lengths.length;
  const stdDev = Math.sqrt(variance);

  // Burstiness: coefficient of variation
  const burstiness = stdDev / avg;

  return Math.min(burstiness, 1.0);
}

/**
 * Calculate Shannon entropy
 */
function calculateEntropy(text) {
  const chars = text.toLowerCase().split('');
  const freq = {};

  // Count character frequencies
  for (const char of chars) {
    freq[char] = (freq[char] || 0) + 1;
  }

  // Calculate entropy
  let entropy = 0;
  const total = chars.length;

  for (const count of Object.values(freq)) {
    const p = count / total;
    entropy -= p * Math.log2(p);
  }

  return entropy;
}

/**
 * Analyze token distribution
 */
function analyzeTokenDistribution(text) {
  const words = text.toLowerCase().split(/\s+/);
  const uniqueWords = new Set(words);

  return {
    totalWords: words.length,
    uniqueWords: uniqueWords.size,
    vocabularyRichness: Math.round((uniqueWords.size / words.length) * 100) / 100
  };
}

/**
 * Calculate overall confidence
 */
function calculateConfidence(perplexity, burstiness, entropy) {
  // Lower perplexity = more AI-like
  const perplexityScore = Math.max(0, 1 - (perplexity / 100));
  
  // Lower burstiness = more AI-like
  const burstinessScore = Math.max(0, 1 - (burstiness / 0.5));
  
  // Moderate entropy expected for AI
  const entropyScore = (entropy > 3.5 && entropy < 5.0) ? 0.8 : 0.4;

  const confidence = (perplexityScore + burstinessScore + entropyScore) / 3;
  return Math.min(confidence, 1.0);
}

// Export for OpenClaw
module.exports = {
  analyzeText,
  calculatePerplexity,
  calculateBurstiness,
  calculateEntropy
};

Usage

const result = await skills.nlpToolkit.analyzeText(text, {
  perplexityThreshold: 45.0,
  burstinessThreshold: 0.35
});

console.log(`AI Detection: ${result.isAI} (${result.confidence}% confidence)`);
console.log(`Perplexity: ${result.metrics.perplexity}`);
console.log(`Burstiness: ${result.metrics.burstiness}`);

Configuration

{
  "perplexityThreshold": 45.0,
  "burstinessThreshold": 0.35,
  "minTextLength": 50
}

NLP Toolkit

Safety Notice

Copy this and send it to your AI assistant to learn

NLP Toolkit

Implementation

Usage

Configuration

Source Transparency

Related Skills

意图分类器

熵管理系统

智能文档处理Skill

PoliBERT Sentiment Analysis