# From local path (recommended for FSS parsers)
npm install ../packages/bobai-frontmatter

# Or link globally
cd /MASTERFOLDER/Tools/parsers/packages/bobai-frontmatter
npm link
# Then in your parser:
npm link @bobai/frontmatter

Quick Start

import {
  FrontmatterGenerator,
  getEnrichmentPrompt,
  PARSER_PROFILES,
  LLMEnrichment
} from '@bobai/frontmatter';

// Generate markdown with frontmatter
const markdown = FrontmatterGenerator.generateMarkdown(
  {
    generator: 'fss-parse-pdf',
    version: '1.2.0',
    title: 'My Document',
    sourcePath: '/path/to/file.pdf',
    profile: PARSER_PROFILES['fss-parse-pdf']  // 'technical'
  },
  {
    word_count: 1234,
    page_count: 8,
    has_tables: true,
    has_images: false
  },
  content,        // Markdown content string
  undefined,      // LLMEnrichment or undefined
  'balanced'      // OutputMode
);

API Reference

FrontmatterGenerator

`generate(options, deterministic?, enrichment?, mode?)`

Generate frontmatter YAML block only.

const frontmatter = FrontmatterGenerator.generate(
  options: FrontmatterOptions,
  deterministic?: DeterministicFields,
  enrichment?: LLMEnrichment,
  mode?: OutputMode  // 'none' | 'balanced' | 'complete'
): string;

`generateMarkdown(options, deterministic, content, enrichment?, mode?)`

Generate complete markdown with frontmatter prepended.

const markdown = FrontmatterGenerator.generateMarkdown(
  options: FrontmatterOptions,
  deterministic: DeterministicFields,
  content: string,
  enrichment?: LLMEnrichment,
  mode?: OutputMode
): string;

Types

FrontmatterOptions

interface FrontmatterOptions {
  generator: string;           // e.g., 'fss-parse-pdf'
  version: string;             // e.g., '1.2.0'
  title: string;               // Document title
  sourcePath?: string | null;  // Original file path
  profile?: ProfileType;       // Document profile
  extractionConfidence?: number;  // 0.0-1.0
  contentQuality?: number;        // 0.0-2.0
}

DeterministicFields

Parser-extracted metadata. Any fields can be included:

interface DeterministicFields {
  word_count?: number;
  page_count?: number;
  character_count?: number;
  [key: string]: any;  // Parser-specific fields
}

LLMEnrichment

AI-generated metadata fields:

interface LLMEnrichment {
  summary?: string;
  tags?: string[];
  category?: string;
  audience?: 'all' | 'beginner' | 'intermediate' | 'expert';
  doc_purpose?: 'reference' | 'tutorial' | 'troubleshooting' | 'conceptual' | 'guide' | 'specification';
  complexity?: number;  // 1-5
  actionable?: boolean;
  key_technologies?: string[];
}

Output Modes

`none`

Returns empty string (no frontmatter). Content only.

`balanced` (default)

Includes:

Core required fields (profile, created, generator, version, title, etc.)
Key deterministic fields from BALANCED_FIELDS list
LLM enrichment fields (or placeholders)

Best for RAG indexing and search.

`complete`

Includes all fields from deterministic object plus core and enrichment fields. Use for archival or when full metadata is needed.

Parser Profiles

Default profiles for each parser type:

import { PARSER_PROFILES } from '@bobai/frontmatter';

PARSER_PROFILES['fss-parse-pdf']          // 'technical'
PARSER_PROFILES['fss-parse-word']         // 'technical'
PARSER_PROFILES['fss-parse-excel']        // 'data'
PARSER_PROFILES['fss-parse-image']        // 'data'
PARSER_PROFILES['fss-parse-audio']        // 'meeting'
PARSER_PROFILES['fss-parse-video']        // 'meeting'
PARSER_PROFILES['fss-parse-email']        // 'data'
PARSER_PROFILES['fss-parse-presentation'] // 'technical'
PARSER_PROFILES['fss-parse-data']         // 'data'
PARSER_PROFILES['fss-parse-diagram']      // 'schema'

Balanced Fields by Parser Type

The BALANCED_FIELDS list includes 70+ fields covering all parser types:

Universal

word_count, page_count, character_count, author, subject, creator, created, modified, file_size, format

PDF/Word Structure

has_tables, has_images, table_count, image_count, section_count, has_toc, has_forms, has_tracked_changes, paragraph_count, heading_count

Excel/Data

sheet_count, row_count, column_count, record_count, format_detected

Image

width, height, channels, has_alpha, color_space, ocr_confidence, has_exif

Audio

duration, duration_seconds, bitrate, sample_rate, codec, has_transcript, speaker_count, language

Video

fps, aspect_ratio, resolution, video_codec, audio_codec

Presentation

slide_count, total_slides, chart_count, has_speaker_notes, has_animations

Email

from, to, cc, sender, recipients, date, message_id, has_attachments, attachment_count, importance, thread_id

Diagram

diagram_count, diagram_type, valid_diagrams, invalid_diagrams, node_count, edge_count

LLM Enrichment

Getting the Prompt

import { getEnrichmentPrompt, getSamplePromptForDocType } from '@bobai/frontmatter';

// Get prompt for LLM enrichment
const prompt = getEnrichmentPrompt(content, 'pdf');

// Send to your LLM...
const response = await llm.generate(prompt);
const enrichment: LLMEnrichment = JSON.parse(response);

// Use in frontmatter generation
const markdown = FrontmatterGenerator.generateMarkdown(
  options,
  deterministic,
  content,
  enrichment,
  'balanced'
);

Prompt Output Format

The LLM will return JSON matching the LLMEnrichment interface:

{
  "summary": "2-3 sentence description",
  "tags": ["specific", "search", "terms"],
  "category": "technical",
  "audience": "intermediate",
  "doc_purpose": "reference",
  "complexity": 3,
  "actionable": false,
  "key_technologies": ["TypeScript", "Node.js"]
}

Parser Integration Example

// In your parser (e.g., pdf-ts/src/pdf-parser.ts)
import {
  FrontmatterGenerator,
  PARSER_PROFILES,
  FrontmatterOptions,
  DeterministicFields
} from '@bobai/frontmatter';
import { version } from '../package.json';

export function generateOutput(
  content: string,
  metadata: ParsedMetadata,
  sourcePath: string,
  mode: 'none' | 'balanced' | 'complete' = 'balanced'
): string {
  const options: FrontmatterOptions = {
    generator: 'fss-parse-pdf',
    version,
    title: metadata.title || 'Untitled',
    sourcePath,
    profile: PARSER_PROFILES['fss-parse-pdf'],
    extractionConfidence: metadata.confidence,
    contentQuality: calculateQuality(metadata)
  };

  const deterministic: DeterministicFields = {
    word_count: metadata.wordCount,
    page_count: metadata.pageCount,
    character_count: metadata.characterCount,
    has_tables: metadata.hasTables,
    has_images: metadata.hasImages,
    table_count: metadata.tableCount,
    image_count: metadata.imageCount,
    author: metadata.author,
    created: metadata.creationDate,
    modified: metadata.modificationDate,
    encrypted: metadata.isEncrypted
  };

  return FrontmatterGenerator.generateMarkdown(
    options,
    deterministic,
    content,
    undefined,  // No LLM enrichment
    mode
  );
}

Constants & Defaults

import {
  DEFAULTS,
  AUDIENCE_VALUES,
  DOC_PURPOSE_VALUES,
  PROFILE_VALUES,
  BALANCED_FIELDS
} from '@bobai/frontmatter';

// Default values
DEFAULTS.profile              // 'data'
DEFAULTS.audience             // 'all'
DEFAULTS.extractionConfidence // 1.0
DEFAULTS.contentQuality       // 1.5
DEFAULTS.complexity           // 3

// Valid values for validation
AUDIENCE_VALUES    // ['all', 'beginner', 'intermediate', 'expert']
DOC_PURPOSE_VALUES // ['reference', 'tutorial', ...]
PROFILE_VALUES     // ['scraped', 'research', 'technical', ...]

Testing

npm test              # Run all tests
npm run test:watch    # Watch mode
npm run test:coverage # Coverage report

Building

npm run build   # Compile TypeScript to dist/
npm run clean   # Remove dist/

Output Example

---
profile: 'technical'
created: '2024-01-15T10:30:00.000Z'
generator: 'fss-parse-pdf'
version: '1.2.0'
title: 'API Documentation'
extraction_confidence: 1
content_quality: 1.5
source_file: '/docs/api.pdf'
word_count: 5000
page_count: 25
has_tables: true
has_images: true
author: 'Development Team'
summary: ''
tags: []
category: ''
---

# API Documentation

Content starts here...

License

MIT

Releases 1

v1.1.1 - Remove empty placeholder fields Latest

2025-11-20 10:11:13 +11:00

Languages

JavaScript 63.7%

TypeScript 36.3%

README.md

@bobai/frontmatter

Installation