Nicholai 68cec8090b Fix critical bugs: settings loading, UUID generation, and chunk metadata
This commit resolves several critical issues that prevented the plugin from
working correctly with Qdrant and adds essential metadata to indexed chunks.

**Settings & Configuration:**
- Fix settings initialization using deep merge instead of shallow Object.assign
  - Prevents nested settings from being lost during load
  - Ensures all default values are properly preserved
- Add orchestrator reinitialization when settings are saved
  - Ensures QdrantClient and embedding providers use updated settings
  - Fixes issue where plugin used localhost instead of saved HTTPS URL

**UUID Generation:**
- Fix generateDeterministicUUID() creating invalid UUIDs
  - Was generating 35-character UUIDs instead of proper 36-character format
  - Now correctly creates valid UUID v4 format: xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx
  - Properly generates segment 5 (12 hex chars) from combined hash data
  - Fixes segment 4 to start with 8/9/a/b per UUID spec
  - Resolves Qdrant API rejections: "value X is not a valid point ID"

**Chunk Metadata:**
- Add chunk_text field to ChunkMetadata type
  - Stores the actual text content of each chunk in Qdrant payload
  - Essential for displaying search results and content preview
- Add model name to chunk metadata
  - Populates model field with embedding provider name (e.g., "nomic-embed-text")
  - Enables tracking which model generated each embedding
  - Supports future multi-model collections

**Debug Logging:**
- Add logging for settings loading and URL tracking
- Add logging for QdrantClient initialization
- Add logging for orchestrator creation with settings

**Documentation:**
- Add CLAUDE.md with comprehensive architecture documentation
  - Build commands and development workflow
  - Core components and data processing pipeline
  - Important implementation details and debugging guide

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-23 11:29:48 -06:00

70 lines
2.3 KiB
TypeScript

/**
* Generate a UUID v4
*/
export function generateUUID(): string {
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
const r = Math.random() * 16 | 0;
const v = c === 'x' ? r : (r & 0x3 | 0x8);
return v.toString(16);
});
}
/**
* Generate a deterministic UUID from a string
* This ensures the same input always produces the same UUID
*/
export function generateDeterministicUUID(input: string): string {
// Simple hash function to convert string to number
let hash = 0;
for (let i = 0; i < input.length; i++) {
const char = input.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32bit integer
}
// Convert hash to UUID format
const hex1 = Math.abs(hash).toString(16).padStart(8, '0');
// Generate additional deterministic hex strings for different UUID segments
let hash2 = hash;
for (let i = 0; i < input.length; i++) {
hash2 = ((hash2 << 3) + input.charCodeAt(i)) & 0xFFFFFFFF;
}
const hex2 = Math.abs(hash2).toString(16).padStart(8, '0');
// Generate third hash for the 12-character segment
let hash3 = hash2;
for (let i = 0; i < input.length; i++) {
hash3 = ((hash3 << 7) + input.charCodeAt(input.length - 1 - i)) & 0xFFFFFFFF;
}
const hex3 = Math.abs(hash3).toString(16).padStart(8, '0');
// Combine to make more hex data for segment 5 (needs 12 chars)
const hex3_extended = (hex3 + hex1).substring(0, 12);
// Create UUID v4 format (xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx)
// Segment 1: 8 hex chars
// Segment 2: 4 hex chars
// Segment 3: 4 hex chars (starting with '4' for version 4)
// Segment 4: 4 hex chars (should start with 8, 9, a, or b for variant)
// Segment 5: 12 hex chars
const segment4 = hex2.substring(0, 4);
const segment4Fixed = (parseInt(segment4.charAt(0), 16) & 0x3 | 0x8).toString(16) + segment4.substring(1, 4);
return `${hex1.substring(0, 8)}-${hex1.substring(0, 4)}-4${hex1.substring(4, 7)}-${segment4Fixed}-${hex3_extended}`;
}
/**
* Generate a simple numeric hash from a string
*/
export function hashString(str: string): number {
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32bit integer
}
return Math.abs(hash);
}