database-excel-reader.ts 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. /* eslint-disable @typescript-eslint/no-explicit-any */
  2. import * as XLSX from 'xlsx';
  3. import { ReadSectionData, LayoutSectionField, SectionTypeEnum, FieldTypeEnum, ImportProgress } from './types';
  4. import { prisma } from '@/lib/prisma';
  5. // Simple logger utility for debugging
  6. const logger = {
  7. debug: (message: string, ...args: any[]) => {
  8. console.debug(`[DatabaseExcelReaderService] ${new Date().toISOString()} - ${message}`, ...args);
  9. },
  10. info: (message: string, ...args: any[]) => {
  11. console.info(`[DatabaseExcelReaderService] ${new Date().toISOString()} - ${message}`, ...args);
  12. },
  13. warn: (message: string, ...args: any[]) => {
  14. console.warn(`[DatabaseExcelReaderService] ${new Date().toISOString()} - ${message}`, ...args);
  15. },
  16. error: (message: string, ...args: any[]) => {
  17. console.error(`[DatabaseExcelReaderService] ${new Date().toISOString()} - ${message}`, ...args);
  18. }
  19. };
  20. export class DatabaseExcelReaderService {
  21. async readExcelFromDatabase(
  22. fileId: string,
  23. layoutConfig: any,
  24. onProgress: (progress: ImportProgress) => void
  25. ): Promise<ReadSectionData[]> {
  26. logger.info('Starting Excel file import from database', {
  27. fileId,
  28. layoutConfigSections: layoutConfig.sections?.length || 0
  29. });
  30. const startTime = Date.now();
  31. try {
  32. // Fetch file from database
  33. const fileRecord = await prisma.file.findUnique({
  34. where: { id: fileId }
  35. });
  36. if (!fileRecord) {
  37. throw new Error(`File with ID ${fileId} not found in database`);
  38. }
  39. logger.info('File retrieved from database', {
  40. filename: fileRecord.filename,
  41. size: fileRecord.size,
  42. mimetype: fileRecord.mimetype
  43. });
  44. // Convert Buffer to ArrayBuffer for xlsx library
  45. const arrayBuffer = fileRecord.data.buffer.slice(
  46. fileRecord.data.byteOffset,
  47. fileRecord.data.byteOffset + fileRecord.data.byteLength
  48. );
  49. logger.debug('Loading Excel workbook from buffer...');
  50. const workbook = XLSX.read(arrayBuffer, { type: 'array' });
  51. logger.info('Excel workbook loaded successfully from database', {
  52. worksheets: workbook.SheetNames.map(name => ({
  53. name,
  54. rowCount: XLSX.utils.sheet_to_json(workbook.Sheets[name]).length
  55. }))
  56. });
  57. const results: ReadSectionData[] = [];
  58. const totalSections = layoutConfig.sections?.length || 0;
  59. logger.info('Processing Excel import from database', { totalSections });
  60. // Initialize progress
  61. onProgress({
  62. importId: 0,
  63. status: 'processing',
  64. currentSection: '',
  65. currentRow: 0,
  66. totalRows: 0,
  67. errors: [],
  68. processedSections: 0,
  69. totalSections
  70. });
  71. for (let sectionIndex = 0; sectionIndex < totalSections; sectionIndex++) {
  72. const section = layoutConfig.sections[sectionIndex];
  73. logger.debug(`Processing section ${sectionIndex + 1}/${totalSections}`, {
  74. sectionName: section.name,
  75. sheetName: section.sheetName,
  76. startingRow: section.startingRow,
  77. endingRow: section.endingRow
  78. });
  79. const worksheet = workbook.Sheets[section.sheetName];
  80. if (!worksheet) {
  81. const error = `Worksheet '${section.sheetName}' not found`;
  82. logger.warn(error, { availableWorksheets: workbook.SheetNames });
  83. onProgress({
  84. importId: 0,
  85. status: 'processing',
  86. currentSection: section.name,
  87. currentRow: 0,
  88. totalRows: 0,
  89. errors: [error],
  90. processedSections: sectionIndex + 1,
  91. totalSections
  92. });
  93. continue;
  94. }
  95. const sectionData = await this.processSectionFromWorksheet(
  96. worksheet,
  97. section,
  98. sectionIndex,
  99. totalSections,
  100. onProgress
  101. );
  102. results.push(sectionData);
  103. logger.info(`Section ${section.name} processed successfully`, {
  104. rowsProcessed: sectionData.data.length,
  105. fields: sectionData.fields.length
  106. });
  107. }
  108. const totalTime = Date.now() - startTime;
  109. logger.info('Excel file import from database completed', {
  110. totalSections: results.length,
  111. totalRows: results.reduce((sum, section) => sum + section.data.length, 0),
  112. totalTimeMs: totalTime
  113. });
  114. return results;
  115. } catch (error) {
  116. logger.error('Error reading Excel file from database', {
  117. error: error instanceof Error ? error.message : String(error),
  118. stack: error instanceof Error ? error.stack : undefined
  119. });
  120. throw error;
  121. }
  122. }
  123. async readExcelFromBuffer(
  124. buffer: Buffer,
  125. layoutConfig: any,
  126. onProgress: (progress: ImportProgress) => void
  127. ): Promise<ReadSectionData[]> {
  128. logger.info('Starting Excel file import from buffer', {
  129. bufferSize: buffer.length,
  130. layoutConfigSections: layoutConfig.sections?.length || 0
  131. });
  132. const startTime = Date.now();
  133. try {
  134. // Convert Buffer to ArrayBuffer for xlsx library
  135. const arrayBuffer = buffer.buffer.slice(
  136. buffer.byteOffset,
  137. buffer.byteOffset + buffer.byteLength
  138. );
  139. logger.debug('Loading Excel workbook from buffer...');
  140. const workbook = XLSX.read(arrayBuffer, { type: 'array' });
  141. logger.info('Excel workbook loaded successfully from buffer', {
  142. worksheets: workbook.SheetNames.map(name => ({
  143. name,
  144. rowCount: XLSX.utils.sheet_to_json(workbook.Sheets[name]).length
  145. }))
  146. });
  147. const results: ReadSectionData[] = [];
  148. const totalSections = layoutConfig.sections?.length || 0;
  149. logger.info('Processing Excel import from buffer', { totalSections });
  150. // Initialize progress
  151. onProgress({
  152. importId: 0,
  153. status: 'processing',
  154. currentSection: '',
  155. currentRow: 0,
  156. totalRows: 0,
  157. errors: [],
  158. processedSections: 0,
  159. totalSections
  160. });
  161. for (let sectionIndex = 0; sectionIndex < totalSections; sectionIndex++) {
  162. const section = layoutConfig.sections[sectionIndex];
  163. logger.debug(`Processing section ${sectionIndex + 1}/${totalSections}`, {
  164. sectionName: section.name,
  165. sheetName: section.sheetName,
  166. startingRow: section.startingRow,
  167. endingRow: section.endingRow
  168. });
  169. const worksheet = workbook.Sheets[section.sheetName];
  170. if (!worksheet) {
  171. const error = `Worksheet '${section.sheetName}' not found`;
  172. logger.warn(error, { availableWorksheets: workbook.SheetNames });
  173. onProgress({
  174. importId: 0,
  175. status: 'processing',
  176. currentSection: section.name,
  177. currentRow: 0,
  178. totalRows: 0,
  179. errors: [error],
  180. processedSections: sectionIndex + 1,
  181. totalSections
  182. });
  183. continue;
  184. }
  185. const sectionData = await this.processSectionFromWorksheet(
  186. worksheet,
  187. section,
  188. sectionIndex,
  189. totalSections,
  190. onProgress
  191. );
  192. results.push(sectionData);
  193. logger.info(`Section ${section.name} processed successfully`, {
  194. rowsProcessed: sectionData.data.length,
  195. fields: sectionData.fields.length
  196. });
  197. }
  198. const totalTime = Date.now() - startTime;
  199. logger.info('Excel file import from buffer completed', {
  200. totalSections: results.length,
  201. totalRows: results.reduce((sum, section) => sum + section.data.length, 0),
  202. totalTimeMs: totalTime
  203. });
  204. return results;
  205. } catch (error) {
  206. logger.error('Error reading Excel file from buffer', {
  207. error: error instanceof Error ? error.message : String(error),
  208. stack: error instanceof Error ? error.stack : undefined
  209. });
  210. throw error;
  211. }
  212. }
  213. private async processSectionFromWorksheet(
  214. worksheet: XLSX.WorkSheet,
  215. section: any,
  216. sectionIndex: number,
  217. totalSections: number,
  218. onProgress: (progress: ImportProgress) => void
  219. ): Promise<ReadSectionData> {
  220. const sectionStartTime = Date.now();
  221. logger.info(`Starting section processing from worksheet`, {
  222. sectionName: section.name,
  223. sheetName: section.sheetName,
  224. sectionIndex: sectionIndex + 1,
  225. totalSections
  226. });
  227. const startingRow = section.startingRow || 2; // Default to 2 to skip header
  228. const endingRow = section.endingRow || Infinity;
  229. logger.debug('Section configuration', {
  230. sectionName: section.name,
  231. startingRow,
  232. endingRow,
  233. fieldsCount: section.fields?.length || 0
  234. });
  235. // Convert worksheet to JSON array
  236. const worksheetData = XLSX.utils.sheet_to_json(worksheet, { header: 1 }) as any[][];
  237. // Process data rows
  238. const data: Record<string, any>[] = [];
  239. const totalRows = Math.min(endingRow, worksheetData.length) - startingRow + 1;
  240. let processedRows = 0;
  241. let skippedRows = 0;
  242. for (let rowNum = startingRow; rowNum <= Math.min(endingRow, worksheetData.length); rowNum++) {
  243. const row = worksheetData[rowNum - 1]; // Convert to 0-based index
  244. if (!row || row.every(cell => cell === null || cell === undefined || cell === '')) {
  245. skippedRows++;
  246. logger.debug(`Skipping empty row ${rowNum}`);
  247. continue;
  248. }
  249. const rowData: Record<string, any> = {};
  250. let fieldsProcessed = 0;
  251. // Map cell values based on field configuration
  252. for (const field of section.fields || []) {
  253. try {
  254. const cellAddress = this.parseCellAddress(field.cellPosition, rowNum);
  255. const cellValue = row[cellAddress.col - 1]; // Convert to 0-based index
  256. logger.debug(`Processing field`, {
  257. fieldName: field.name,
  258. cellPosition: field.cellPosition,
  259. cellAddress,
  260. rawValue: cellValue,
  261. rowNum
  262. });
  263. if (cellValue !== null && cellValue !== undefined && cellValue !== '') {
  264. const value = this.convertCellValue(
  265. field.dataType,
  266. field.dataTypeFormat,
  267. cellValue,
  268. field.parsedType || FieldTypeEnum.String
  269. );
  270. logger.debug(`Value converted`, {
  271. fieldName: field.name,
  272. originalValue: cellValue,
  273. convertedValue: value,
  274. fieldType: field.parsedType || FieldTypeEnum.String
  275. });
  276. // Map to the correct column name for Prisma model
  277. const columnName = field.importTableColumnName;
  278. rowData[columnName] = value;
  279. fieldsProcessed++;
  280. }
  281. } catch (error) {
  282. logger.error(`Error processing field ${field.name} at row ${rowNum}`, {
  283. error: error instanceof Error ? error.message : String(error),
  284. field,
  285. rowNum
  286. });
  287. }
  288. }
  289. // Only add non-empty rows
  290. if (Object.keys(rowData).length > 0) {
  291. data.push(rowData);
  292. processedRows++;
  293. if (processedRows <= 5 || processedRows % 100 === 0) {
  294. logger.debug(`Row processed`, {
  295. rowNum,
  296. fieldsProcessed,
  297. rowDataKeys: Object.keys(rowData),
  298. dataLength: data.length
  299. });
  300. }
  301. } else {
  302. logger.debug(`Skipping row with no valid data`, { rowNum });
  303. }
  304. // Update progress every 100 rows
  305. if (rowNum % 100 === 0 || rowNum === Math.min(endingRow, worksheetData.length)) {
  306. onProgress({
  307. importId: 0,
  308. status: 'processing',
  309. currentSection: section.name,
  310. currentRow: rowNum - startingRow + 1,
  311. totalRows,
  312. errors: [],
  313. processedSections: sectionIndex,
  314. totalSections
  315. });
  316. }
  317. }
  318. const sectionTime = Date.now() - sectionStartTime;
  319. logger.info(`Section processing completed`, {
  320. sectionName: section.name,
  321. processedRows,
  322. skippedRows,
  323. totalRows,
  324. dataRows: data.length,
  325. processingTimeMs: sectionTime
  326. });
  327. const result = {
  328. id: section.id || 0,
  329. name: section.name || '',
  330. tableName: section.tableName || '',
  331. sheet: section.sheetName || '',
  332. type: section.type || '',
  333. startingRow,
  334. endingRow,
  335. parsedType: this.mapSectionType(section.type),
  336. fields: this.mapFields(section.fields || []),
  337. data
  338. };
  339. logger.debug('Section result', {
  340. sectionName: section.name,
  341. resultSummary: {
  342. id: result.id,
  343. name: result.name,
  344. tableName: result.tableName,
  345. dataRows: result.data.length,
  346. fields: result.fields.length
  347. }
  348. });
  349. return result;
  350. }
  351. private parseCellAddress(cellPosition: string, rowNumber: number): { row: number; col: number } {
  352. logger.debug(`Parsing cell address: ${cellPosition}`);
  353. let match = cellPosition.match(/([A-Z]+)(\d+)/);
  354. if (!match) {
  355. logger.warn(`Invalid cell position format: ${cellPosition}, trying to add row number ${rowNumber}`);
  356. const appendedCellPosition = `${cellPosition}${rowNumber}`;
  357. match = appendedCellPosition.match(/([A-Z]+)(\d+)/);
  358. if (!match) {
  359. logger.warn(`Invalid cell position format: ${appendedCellPosition}, using default 1,1`);
  360. return { row: 1, col: 1 };
  361. }
  362. }
  363. const col = match[1].charCodeAt(0) - 'A'.charCodeAt(0) + 1;
  364. const row = parseInt(match[2]);
  365. logger.debug(`Parsed cell address`, {
  366. original: cellPosition,
  367. row,
  368. col
  369. });
  370. return { row, col };
  371. }
  372. private mapSectionType(type: string): SectionTypeEnum {
  373. logger.debug(`Mapping section type: ${type}`);
  374. const mappedType = (() => {
  375. switch (type?.toLowerCase()) {
  376. case 'grid':
  377. return SectionTypeEnum.Grid;
  378. case 'properties':
  379. return SectionTypeEnum.Properties;
  380. default:
  381. return SectionTypeEnum.Unknown;
  382. }
  383. })();
  384. logger.debug(`Section type mapped`, {
  385. originalType: type,
  386. mappedType: SectionTypeEnum[mappedType]
  387. });
  388. return mappedType;
  389. }
  390. private mapFields(fields: any[]): LayoutSectionField[] {
  391. logger.debug(`Mapping ${fields.length} fields`);
  392. const mappedFields = fields.map((field, index) => {
  393. const mappedField = {
  394. id: field.id || index,
  395. cellPosition: field.cellPosition || '',
  396. name: field.name || '',
  397. dataType: field.dataType || 'string',
  398. dataTypeFormat: field.dataTypeFormat,
  399. importTableColumnName: field.importTableColumnName || field.name || `column_${index}`,
  400. importColumnOrderNumber: field.importColumnOrderNumber || index,
  401. parsedType: this.mapFieldType(field.dataType)
  402. };
  403. logger.debug(`Field mapped`, {
  404. index,
  405. originalName: field.name,
  406. mappedName: mappedField.name,
  407. cellPosition: mappedField.cellPosition,
  408. parsedType: FieldTypeEnum[mappedField.parsedType]
  409. });
  410. return mappedField;
  411. });
  412. return mappedFields;
  413. }
  414. private mapFieldType(dataType: string): FieldTypeEnum {
  415. const type = dataType?.toLowerCase();
  416. const mappedType = (() => {
  417. switch (type) {
  418. case 'time':
  419. return FieldTypeEnum.Time;
  420. case 'decimal':
  421. case 'number':
  422. case 'float':
  423. return FieldTypeEnum.Decimal;
  424. case 'date':
  425. return FieldTypeEnum.Date;
  426. case 'int':
  427. case 'integer':
  428. case 'numeric':
  429. return FieldTypeEnum.Numeric;
  430. default:
  431. return FieldTypeEnum.String;
  432. }
  433. })();
  434. logger.debug(`Field type mapped`, {
  435. originalDataType: dataType,
  436. mappedType: FieldTypeEnum[mappedType]
  437. });
  438. return mappedType;
  439. }
  440. private convertCellValue(dataType: string, dataTypeFormat: string | undefined, value: any, fieldType: FieldTypeEnum): any {
  441. if (value === null || value === undefined) {
  442. logger.debug(`Converting null/undefined value to null`, { fieldType: FieldTypeEnum[fieldType] });
  443. return null;
  444. }
  445. logger.debug(`Converting cell value`, {
  446. originalValue: value,
  447. originalType: typeof value,
  448. targetFieldType: FieldTypeEnum[fieldType]
  449. });
  450. const convertedValue = (() => {
  451. if (dataType === "DATE")
  452. return XLSX.SSF.format(dataTypeFormat || 'yyyy-mm-dd', value);
  453. switch (fieldType) {
  454. case FieldTypeEnum.Time:
  455. if (typeof value === 'number') {
  456. // Excel time is fraction of a day
  457. const result = value * 24 * 60 * 60 * 1000; // Convert to milliseconds
  458. logger.debug(`Time conversion`, { original: value, converted: result });
  459. return result;
  460. }
  461. return value;
  462. case FieldTypeEnum.Decimal:
  463. const decimalResult = parseFloat(value.toString()) || 0;
  464. logger.debug(`Decimal conversion`, { original: value, converted: decimalResult });
  465. return decimalResult;
  466. case FieldTypeEnum.Date:
  467. if (typeof value === 'number') {
  468. // Excel date is days since 1900-01-01
  469. const excelEpoch = new Date(1900, 0, 1);
  470. const dateResult = new Date(excelEpoch.getTime() + (value - 1) * 24 * 60 * 60 * 1000);
  471. logger.debug(`Date conversion`, { original: value, converted: dateResult });
  472. return dateResult;
  473. }
  474. const dateResult = new Date(value);
  475. logger.debug(`Date conversion from string`, { original: value, converted: dateResult });
  476. return dateResult;
  477. case FieldTypeEnum.Numeric:
  478. const numericResult = parseInt(value.toString()) || 0;
  479. logger.debug(`Numeric conversion`, { original: value, converted: numericResult });
  480. return numericResult;
  481. case FieldTypeEnum.String:
  482. default:
  483. const stringResult = value.toString();
  484. logger.debug(`String conversion`, { original: value, converted: stringResult });
  485. return stringResult;
  486. }
  487. })();
  488. return convertedValue;
  489. }
  490. }