mirror of
https://github.com/lobehub/lobehub.git
synced 2026-03-26 13:19:34 +07:00
✅ test: add unit tests for eval-dataset-parser (#13197)
Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,74 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import * as XLSX from 'xlsx';
|
||||
|
||||
import { detectFormat } from '../src/detect';
|
||||
|
||||
const XLSX_MAGIC = new Uint8Array([0x50, 0x4b, 0x03, 0x04]);
|
||||
|
||||
describe('detectFormat - edge cases', () => {
|
||||
it('should detect XLS by filename extension', () => {
|
||||
expect(detectFormat('', 'data.xls')).toBe('xlsx');
|
||||
});
|
||||
|
||||
it('should detect XLSX magic bytes from Uint8Array without filename', () => {
|
||||
// Create a real minimal XLSX binary
|
||||
const workbook = XLSX.utils.book_new();
|
||||
XLSX.utils.book_append_sheet(workbook, XLSX.utils.aoa_to_sheet([['a']]), 'Sheet1');
|
||||
const buf = XLSX.write(workbook, { bookType: 'xlsx', type: 'array' });
|
||||
const data = new Uint8Array(buf);
|
||||
|
||||
const result = detectFormat(data);
|
||||
expect(result).toBe('xlsx');
|
||||
});
|
||||
|
||||
it('should detect XLSX magic bytes from Buffer without filename', () => {
|
||||
const workbook = XLSX.utils.book_new();
|
||||
XLSX.utils.book_append_sheet(workbook, XLSX.utils.aoa_to_sheet([['a']]), 'Sheet1');
|
||||
const buf = XLSX.write(workbook, { bookType: 'xlsx', type: 'buffer' }) as Buffer;
|
||||
|
||||
const result = detectFormat(buf);
|
||||
expect(result).toBe('xlsx');
|
||||
});
|
||||
|
||||
it('should parse JSON from Uint8Array containing JSON array', () => {
|
||||
const json = '[{"a":1}]';
|
||||
const data = new TextEncoder().encode(json);
|
||||
expect(detectFormat(data)).toBe('json');
|
||||
});
|
||||
|
||||
it('should parse JSONL from Uint8Array', () => {
|
||||
const jsonl = '{"a":1}\n{"b":2}';
|
||||
const data = new TextEncoder().encode(jsonl);
|
||||
expect(detectFormat(data)).toBe('jsonl');
|
||||
});
|
||||
|
||||
it('should fall back to CSV from Uint8Array with CSV content', () => {
|
||||
const csv = 'col1,col2\nval1,val2';
|
||||
const data = new TextEncoder().encode(csv);
|
||||
expect(detectFormat(data)).toBe('csv');
|
||||
});
|
||||
|
||||
it('should not detect XLSX from short Uint8Array (less than 4 bytes)', () => {
|
||||
const data = new Uint8Array([0x50, 0x4b]);
|
||||
// Not enough bytes for magic number → falls through to string detection
|
||||
expect(detectFormat(data)).toBe('csv');
|
||||
});
|
||||
|
||||
it('filename extension takes precedence over content', () => {
|
||||
// Content looks like JSON but filename says CSV
|
||||
const json = '[{"a":1}]';
|
||||
expect(detectFormat(json, 'data.csv')).toBe('csv');
|
||||
});
|
||||
|
||||
it('should treat a JSON-like string that fails parse as CSV', () => {
|
||||
// Starts with '[' but is not valid JSON
|
||||
const badJson = '[not valid json';
|
||||
expect(detectFormat(badJson)).toBe('csv');
|
||||
});
|
||||
|
||||
it('should treat an object-like first line that fails parse as CSV', () => {
|
||||
// Starts with '{' on first line but is not valid JSON
|
||||
const badJsonL = '{not valid jsonl}\nmore data';
|
||||
expect(detectFormat(badJsonL)).toBe('csv');
|
||||
});
|
||||
});
|
||||
111
packages/eval-dataset-parser/__tests__/parseDataset.edge.test.ts
Normal file
111
packages/eval-dataset-parser/__tests__/parseDataset.edge.test.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import * as XLSX from 'xlsx';
|
||||
|
||||
import { parseDataset } from '../src';
|
||||
|
||||
function makeXLSXBuffer(rows: Record<string, any>[], sheetName = 'Sheet1'): Uint8Array {
|
||||
const workbook = XLSX.utils.book_new();
|
||||
const worksheet = XLSX.utils.json_to_sheet(rows);
|
||||
XLSX.utils.book_append_sheet(workbook, worksheet, sheetName);
|
||||
const buffer = XLSX.write(workbook, { bookType: 'xlsx', type: 'array' });
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
||||
describe('parseDataset - XLSX', () => {
|
||||
const rows = [
|
||||
{ question: 'Q1', answer: 'A1' },
|
||||
{ question: 'Q2', answer: 'A2' },
|
||||
];
|
||||
|
||||
it('should parse XLSX from Uint8Array', () => {
|
||||
const data = makeXLSXBuffer(rows);
|
||||
const result = parseDataset(data, { format: 'xlsx' });
|
||||
expect(result.format).toBe('xlsx');
|
||||
expect(result.headers).toEqual(['question', 'answer']);
|
||||
expect(result.totalCount).toBe(2);
|
||||
});
|
||||
|
||||
it('should auto-detect XLSX from magic bytes', () => {
|
||||
const data = makeXLSXBuffer(rows);
|
||||
const result = parseDataset(data);
|
||||
expect(result.format).toBe('xlsx');
|
||||
});
|
||||
|
||||
it('should auto-detect XLSX by filename', () => {
|
||||
const data = makeXLSXBuffer(rows);
|
||||
const result = parseDataset(data, { filename: 'test.xlsx' });
|
||||
expect(result.format).toBe('xlsx');
|
||||
expect(result.totalCount).toBe(2);
|
||||
});
|
||||
|
||||
it('should throw when XLSX format is used with string input', () => {
|
||||
expect(() => parseDataset('some string', { format: 'xlsx' })).toThrow(
|
||||
'XLSX format requires binary input',
|
||||
);
|
||||
});
|
||||
|
||||
it('should support preview for XLSX', () => {
|
||||
const manyRows = Array.from({ length: 10 }, (_, i) => ({ id: i, val: `v${i}` }));
|
||||
const data = makeXLSXBuffer(manyRows);
|
||||
const result = parseDataset(data, { format: 'xlsx', preview: 3 });
|
||||
expect(result.rows).toHaveLength(3);
|
||||
expect(result.totalCount).toBe(10);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseDataset - Buffer input', () => {
|
||||
it('should parse CSV from Buffer', () => {
|
||||
const csv = 'a,b\n1,2\n3,4';
|
||||
const buf = Buffer.from(csv, 'utf8');
|
||||
const result = parseDataset(buf, { format: 'csv' });
|
||||
expect(result.format).toBe('csv');
|
||||
expect(result.headers).toEqual(['a', 'b']);
|
||||
expect(result.totalCount).toBe(2);
|
||||
});
|
||||
|
||||
it('should parse JSON from Buffer', () => {
|
||||
const json = '[{"x":1},{"x":2}]';
|
||||
const buf = Buffer.from(json, 'utf8');
|
||||
const result = parseDataset(buf, { format: 'json' });
|
||||
expect(result.format).toBe('json');
|
||||
expect(result.totalCount).toBe(2);
|
||||
});
|
||||
|
||||
it('should parse JSONL from Buffer', () => {
|
||||
const jsonl = '{"k":"v1"}\n{"k":"v2"}';
|
||||
const buf = Buffer.from(jsonl, 'utf8');
|
||||
const result = parseDataset(buf, { format: 'jsonl' });
|
||||
expect(result.format).toBe('jsonl');
|
||||
expect(result.totalCount).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseDataset - error cases', () => {
|
||||
it('should throw for invalid JSON content', () => {
|
||||
expect(() => parseDataset('not-json', { format: 'json' })).toThrow();
|
||||
});
|
||||
|
||||
it('should throw when JSON is not an array', () => {
|
||||
expect(() => parseDataset('{"a":1}', { format: 'json' })).toThrow(
|
||||
'JSON file must contain an array of objects',
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw on invalid JSONL line', () => {
|
||||
expect(() => parseDataset('{"a":1}\nbad-line', { format: 'jsonl' })).toThrow(
|
||||
'Invalid JSON at line 2',
|
||||
);
|
||||
});
|
||||
|
||||
it('should use explicit format over auto-detection', () => {
|
||||
// Content looks like JSONL but format is forced to CSV
|
||||
const result = parseDataset('{"a":1}', { format: 'csv' });
|
||||
expect(result.format).toBe('csv');
|
||||
});
|
||||
|
||||
it('should auto-detect when format is "auto"', () => {
|
||||
const json = '[{"a":1}]';
|
||||
const result = parseDataset(json, { format: 'auto' });
|
||||
expect(result.format).toBe('json');
|
||||
});
|
||||
});
|
||||
227
packages/eval-dataset-parser/__tests__/parsers.test.ts
Normal file
227
packages/eval-dataset-parser/__tests__/parsers.test.ts
Normal file
@@ -0,0 +1,227 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import * as XLSX from 'xlsx';
|
||||
|
||||
import { parseCSV } from '../src/parsers/csv';
|
||||
import { parseJSON } from '../src/parsers/json';
|
||||
import { parseJSONL } from '../src/parsers/jsonl';
|
||||
import { parseXLSX } from '../src/parsers/xlsx';
|
||||
|
||||
// ─── CSV ────────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('parseCSV', () => {
|
||||
const basicCSV = 'name,age,city\nAlice,30,NYC\nBob,25,LA\nCarol,35,Chicago';
|
||||
|
||||
it('should parse headers and rows correctly', () => {
|
||||
const result = parseCSV(basicCSV);
|
||||
expect(result.format).toBe('csv');
|
||||
expect(result.headers).toEqual(['name', 'age', 'city']);
|
||||
expect(result.totalCount).toBe(3);
|
||||
expect(result.rows).toHaveLength(3);
|
||||
expect(result.rows[0]).toEqual({ name: 'Alice', age: 30, city: 'NYC' });
|
||||
});
|
||||
|
||||
it('should apply preview limit', () => {
|
||||
const result = parseCSV(basicCSV, { preview: 2 });
|
||||
expect(result.rows).toHaveLength(2);
|
||||
expect(result.totalCount).toBe(3);
|
||||
});
|
||||
|
||||
it('should handle custom delimiter', () => {
|
||||
const tsvContent = 'name\tage\nAlice\t30\nBob\t25';
|
||||
const result = parseCSV(tsvContent, { csvDelimiter: '\t' });
|
||||
expect(result.headers).toEqual(['name', 'age']);
|
||||
expect(result.rows[0]).toMatchObject({ name: 'Alice', age: 30 });
|
||||
});
|
||||
|
||||
it('should handle empty CSV (only headers)', () => {
|
||||
const result = parseCSV('name,age\n');
|
||||
expect(result.headers).toEqual(['name', 'age']);
|
||||
expect(result.totalCount).toBe(0);
|
||||
expect(result.rows).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('should handle CSV with quoted fields', () => {
|
||||
const csv = 'name,bio\nAlice,"She said, hello"\nBob,Simple';
|
||||
const result = parseCSV(csv);
|
||||
expect(result.rows[0].bio).toBe('She said, hello');
|
||||
});
|
||||
|
||||
it('should dynamically type numeric values', () => {
|
||||
const csv = 'id,score\n1,9.5\n2,8.0';
|
||||
const result = parseCSV(csv);
|
||||
expect(typeof result.rows[0].id).toBe('number');
|
||||
expect(typeof result.rows[0].score).toBe('number');
|
||||
});
|
||||
});
|
||||
|
||||
// ─── JSON ────────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('parseJSON', () => {
|
||||
const validJSON = JSON.stringify([
|
||||
{ question: 'Q1', answer: 'A1' },
|
||||
{ question: 'Q2', answer: 'A2' },
|
||||
{ question: 'Q3', answer: 'A3' },
|
||||
]);
|
||||
|
||||
it('should parse a JSON array', () => {
|
||||
const result = parseJSON(validJSON);
|
||||
expect(result.format).toBe('json');
|
||||
expect(result.headers).toEqual(['question', 'answer']);
|
||||
expect(result.totalCount).toBe(3);
|
||||
expect(result.rows).toHaveLength(3);
|
||||
expect(result.rows[1]).toEqual({ question: 'Q2', answer: 'A2' });
|
||||
});
|
||||
|
||||
it('should apply preview limit', () => {
|
||||
const result = parseJSON(validJSON, { preview: 2 });
|
||||
expect(result.rows).toHaveLength(2);
|
||||
expect(result.totalCount).toBe(3);
|
||||
});
|
||||
|
||||
it('should throw on invalid JSON', () => {
|
||||
expect(() => parseJSON('not json at all')).toThrow();
|
||||
});
|
||||
|
||||
it('should throw when JSON is not an array', () => {
|
||||
expect(() => parseJSON('{"key":"value"}')).toThrow(
|
||||
'JSON file must contain an array of objects',
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle empty JSON array', () => {
|
||||
const result = parseJSON('[]');
|
||||
expect(result.headers).toEqual([]);
|
||||
expect(result.totalCount).toBe(0);
|
||||
expect(result.rows).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('should extract headers from first object only', () => {
|
||||
const json = JSON.stringify([
|
||||
{ a: 1, b: 2 },
|
||||
{ a: 3, c: 4 }, // 'c' is extra
|
||||
]);
|
||||
const result = parseJSON(json);
|
||||
expect(result.headers).toEqual(['a', 'b']);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── JSONL ────────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('parseJSONL', () => {
|
||||
const validJSONL = '{"id":1,"text":"first"}\n{"id":2,"text":"second"}\n{"id":3,"text":"third"}';
|
||||
|
||||
it('should parse JSONL lines', () => {
|
||||
const result = parseJSONL(validJSONL);
|
||||
expect(result.format).toBe('jsonl');
|
||||
expect(result.headers).toEqual(['id', 'text']);
|
||||
expect(result.totalCount).toBe(3);
|
||||
expect(result.rows).toHaveLength(3);
|
||||
expect(result.rows[0]).toEqual({ id: 1, text: 'first' });
|
||||
});
|
||||
|
||||
it('should apply preview limit', () => {
|
||||
const result = parseJSONL(validJSONL, { preview: 2 });
|
||||
expect(result.rows).toHaveLength(2);
|
||||
expect(result.totalCount).toBe(3);
|
||||
});
|
||||
|
||||
it('should throw on invalid JSON line with line number', () => {
|
||||
const bad = '{"id":1}\nnot-json\n{"id":3}';
|
||||
expect(() => parseJSONL(bad)).toThrow('Invalid JSON at line 2');
|
||||
});
|
||||
|
||||
it('should skip blank lines', () => {
|
||||
const withBlanks = '{"id":1}\n\n{"id":2}\n';
|
||||
const result = parseJSONL(withBlanks);
|
||||
expect(result.totalCount).toBe(2);
|
||||
expect(result.rows).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('should handle single-line JSONL', () => {
|
||||
const result = parseJSONL('{"only":"one"}');
|
||||
expect(result.totalCount).toBe(1);
|
||||
expect(result.rows[0]).toEqual({ only: 'one' });
|
||||
});
|
||||
|
||||
it('should handle empty JSONL input', () => {
|
||||
const result = parseJSONL('');
|
||||
expect(result.totalCount).toBe(0);
|
||||
expect(result.rows).toHaveLength(0);
|
||||
expect(result.headers).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── XLSX ────────────────────────────────────────────────────────────────────
|
||||
|
||||
function makeXLSXBuffer(rows: Record<string, any>[], sheetName = 'Sheet1'): Uint8Array {
|
||||
const workbook = XLSX.utils.book_new();
|
||||
const worksheet = XLSX.utils.json_to_sheet(rows);
|
||||
XLSX.utils.book_append_sheet(workbook, worksheet, sheetName);
|
||||
const buffer = XLSX.write(workbook, { bookType: 'xlsx', type: 'array' });
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
||||
describe('parseXLSX', () => {
|
||||
const sampleRows = [
|
||||
{ name: 'Alice', score: 95 },
|
||||
{ name: 'Bob', score: 87 },
|
||||
{ name: 'Carol', score: 72 },
|
||||
];
|
||||
|
||||
it('should parse XLSX data from Uint8Array', () => {
|
||||
const data = makeXLSXBuffer(sampleRows);
|
||||
const result = parseXLSX(data);
|
||||
expect(result.format).toBe('xlsx');
|
||||
expect(result.headers).toEqual(['name', 'score']);
|
||||
expect(result.totalCount).toBe(3);
|
||||
expect(result.rows).toHaveLength(3);
|
||||
expect(result.rows[0].name).toBe('Alice');
|
||||
expect(result.metadata?.sheetName).toBe('Sheet1');
|
||||
});
|
||||
|
||||
it('should apply preview limit', () => {
|
||||
const data = makeXLSXBuffer(sampleRows);
|
||||
const result = parseXLSX(data, { preview: 2 });
|
||||
expect(result.rows).toHaveLength(2);
|
||||
expect(result.totalCount).toBe(3);
|
||||
});
|
||||
|
||||
it('should select sheet by name', () => {
|
||||
const workbook = XLSX.utils.book_new();
|
||||
XLSX.utils.book_append_sheet(workbook, XLSX.utils.json_to_sheet([{ x: 1 }]), 'First');
|
||||
XLSX.utils.book_append_sheet(workbook, XLSX.utils.json_to_sheet([{ y: 2 }]), 'Second');
|
||||
const data = new Uint8Array(XLSX.write(workbook, { bookType: 'xlsx', type: 'array' }));
|
||||
|
||||
const result = parseXLSX(data, { sheet: 'Second' });
|
||||
expect(result.metadata?.sheetName).toBe('Second');
|
||||
expect(result.headers).toEqual(['y']);
|
||||
expect(result.rows[0].y).toBe('2');
|
||||
});
|
||||
|
||||
it('should select sheet by index', () => {
|
||||
const workbook = XLSX.utils.book_new();
|
||||
XLSX.utils.book_append_sheet(workbook, XLSX.utils.json_to_sheet([{ x: 1 }]), 'First');
|
||||
XLSX.utils.book_append_sheet(workbook, XLSX.utils.json_to_sheet([{ y: 2 }]), 'Second');
|
||||
const data = new Uint8Array(XLSX.write(workbook, { bookType: 'xlsx', type: 'array' }));
|
||||
|
||||
const result = parseXLSX(data, { sheet: 1 });
|
||||
expect(result.metadata?.sheetName).toBe('Second');
|
||||
expect(result.headers).toEqual(['y']);
|
||||
});
|
||||
|
||||
it('should return empty result for nonexistent sheet name', () => {
|
||||
const data = makeXLSXBuffer(sampleRows, 'Data');
|
||||
const result = parseXLSX(data, { sheet: 'NonExistent' });
|
||||
expect(result.rows).toHaveLength(0);
|
||||
expect(result.headers).toEqual([]);
|
||||
expect(result.totalCount).toBe(0);
|
||||
expect(result.metadata?.sheetName).toBe('NonExistent');
|
||||
});
|
||||
|
||||
it('should default to first sheet when no sheet option provided', () => {
|
||||
const data = makeXLSXBuffer(sampleRows, 'MySheet');
|
||||
const result = parseXLSX(data);
|
||||
expect(result.metadata?.sheetName).toBe('MySheet');
|
||||
expect(result.totalCount).toBe(3);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user