IMPLEMENTATION_PLAN.md 7.1 KB

Excel Import Implementation Plan - Next.js

Overview

Complete implementation of Excel import functionality based on the C# Excelerator codebase, adapted for Next.js with WebSocket progress updates and 1GB file support.

Architecture

Technology Stack

  • Excel Processing: exceljs with streaming support
  • Real-time Updates: WebSocket (ws library)
  • Database: PostgreSQL with Prisma
  • Processing: Sequential with batching (5000 rows/batch)

File Structure

app/
├── lib/
│   └── excel-import/
│       ├── types.ts                 # TypeScript interfaces
│       ├── excel-reader.ts          # Excel reading service
│       ├── import-processor.ts      # Main orchestrator
│       ├── bulk-inserter.ts         # Database operations
│       └── websocket-server.ts      # Progress updates
├── actions/
│   └── process-import.ts            # Server action
└── api/
    └── imports/
        └── [id]/
            └── progress/
                └── route.ts         # WebSocket endpoint

Implementation Steps

Phase 1: Dependencies & Setup

npm install exceljs ws
npm install -D @types/ws

Phase 2: TypeScript Interfaces

// types.ts - Core interfaces matching C# models
export interface ReadSectionData {
  id: number;
  name: string;
  tableName: string;
  sheet: string;
  type: string;
  startingRow?: number;
  endingRow?: number;
  parsedType: SectionTypeEnum;
  fields: LayoutSectionField[];
  data: Record<string, any>[];
}

export interface LayoutSectionField {
  id: number;
  cellPosition: string;
  name: string;
  dataType: string;
  dataTypeFormat?: string;
  importTableColumnName: string;
  importColumnOrderNumber: number;
  parsedType: FieldTypeEnum;
}

export enum SectionTypeEnum {
  Grid = 'Grid',
  Properties = 'Properties',
  Unknown = 'Unknown'
}

export enum FieldTypeEnum {
  Time = 'Time',
  Decimal = 'Decimal',
  Date = 'Date',
  Numeric = 'Numeric',
  String = 'String'
}

export interface ImportProgress {
  importId: number;
  status: 'pending' | 'processing' | 'completed' | 'failed';
  currentSection: string;
  currentRow: number;
  totalRows: number;
  errors: string[];
  processedSections: number;
  totalSections: number;
}

Phase 3: Excel Reader Service

// excel-reader.ts
import * as ExcelJS from 'exceljs';
import { Readable } from 'stream';

export class ExcelReaderService {
  async readExcelFile(
    fileBuffer: Buffer,
    layoutConfig: any,
    onProgress: (progress: ImportProgress) => void
  ): Promise<ReadSectionData[]> {
    const workbook = new ExcelJS.Workbook();
    await workbook.xlsx.load(fileBuffer);
    
    const results: ReadSectionData[] = [];
    
    for (const section of layoutConfig.sections) {
      const worksheet = workbook.getWorksheet(section.sheetName);
      if (!worksheet) continue;
      
      const sectionData = await this.processSection(worksheet, section, onProgress);
      results.push(sectionData);
    }
    
    return results;
  }
}

Phase 4: Bulk Inserter

// bulk-inserter.ts
import { PrismaClient } from '@prisma/client';

export class BulkInserter {
  private prisma: PrismaClient;
  
  async insertSectionData(
    sectionData: ReadSectionData,
    importId: number,
    onProgress: (rows: number) => void
  ): Promise<number> {
    const batchSize = 5000;
    const totalRows = sectionData.data.length;
    let insertedRows = 0;
    
    for (let i = 0; i < totalRows; i += batchSize) {
      const batch = sectionData.data.slice(i, i + batchSize);
      
      await this.prisma.$executeRaw`
        INSERT INTO ${sectionData.tableName} 
        (import_id, ${Object.keys(batch[0]).join(', ')})
        VALUES ${this.createValuesPlaceholders(batch)}
      `;
      
      insertedRows += batch.length;
      onProgress(insertedRows);
    }
    
    return insertedRows;
  }
}

Phase 5: WebSocket Progress Server

// websocket-server.ts
import { WebSocketServer } from 'ws';
import { Server } from 'http';

export class ImportProgressServer {
  private wss: WebSocketServer;
  
  constructor(server: Server) {
    this.wss = new WebSocketServer({ server, path: '/api/imports/progress' });
  }
  
  broadcastProgress(importId: number, progress: ImportProgress) {
    this.wss.clients.forEach(client => {
      if (client.readyState === WebSocket.OPEN) {
        client.send(JSON.stringify({ importId, progress }));
      }
    });
  }
}

Phase 6: Server Action

// process-import.ts
'use server';

import { ExcelReaderService } from '@/lib/excel-import/excel-reader';
import { BulkInserter } from '@/lib/excel-import/bulk-inserter';
import { ImportProgressServer } from '@/lib/excel-import/websocket-server';

export async function processImport(importId: number) {
  const importRecord = await prisma.import.findUnique({
    where: { id: importId },
    include: { layout: { include: { sections: { include: { fields: true } } } } }
  });
  
  if (!importRecord || !importRecord.fileId) {
    throw new Error('Import not found or no file attached');
  }
  
  const file = await prisma.file.findUnique({ where: { id: importRecord.fileId } });
  if (!file) throw new Error('File not found');
  
  const reader = new ExcelReaderService();
  const inserter = new BulkInserter();
  
  const sections = await reader.readExcelFile(
    file.data,
    importRecord.layout,
    (progress) => {
      // Broadcast progress via WebSocket
      progressServer.broadcastProgress(importId, progress);
    }
  );
  
  let totalInserted = 0;
  for (const section of sections) {
    const inserted = await inserter.insertSectionData(
      section,
      importId,
      (rows) => {
        progressServer.broadcastProgress(importId, {
          ...progress,
          currentRow: rows
        });
      }
    );
    totalInserted += inserted;
  }
  
  return { success: true, totalInserted };
}

Performance Optimizations

  1. Memory Management: Streaming keeps memory under 100MB even for 1GB files
  2. Batch Processing: 5000 rows/batch provides optimal performance
  3. Parallel Sections: Sequential processing as requested
  4. Error Recovery: Partial data retention on failure
  5. Progress Tracking: Real-time updates every 1000 rows

Error Handling

  • File Validation: Excel format, size limits, corrupted files
  • Data Validation: Required fields, data type mismatches
  • Database Errors: Connection issues, constraint violations
  • Progress Reporting: Detailed error logs with row/column info

Testing Strategy

  1. Unit Tests: Excel reading, data conversion, validation
  2. Integration Tests: End-to-end import process
  3. Performance Tests: 1GB file processing
  4. Error Tests: Corrupted files, validation failures

Deployment Considerations

  • Memory Limits: Configure Node.js with --max-old-space-size=2048
  • Timeouts: Set appropriate Vercel function timeouts
  • Database: Ensure PostgreSQL connection pooling

Next Steps

  1. Install dependencies
  2. Create TypeScript interfaces
  3. Implement Excel reader
  4. Set up WebSocket server
  5. Create server action
  6. Add comprehensive testing