Code examplesRaw file parsingFunctions on Objects

Functions on Objects

Typescript

Parse and process Excel files in Typescript

How do I parse and process Excel files in Typescript?

This code uses the ExcelJS library to parse and process Excel files in Typescript. It reads an Excel file, processes the workbook, validates the header, and unpivots the data to create a new data object.

Copied!
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 import { OntologyEditFunction, Timestamp } from "@foundry/functions-api"; import { Uuid } from "@foundry/functions-utils"; import { Objects, Upload, SampleData } from "@foundry/ontology-api"; import { Workbook } from "exceljs"; interface DataRow { target: string; unit: string; property: string; value: number; uploaded_at: Timestamp; upload_key: string; } export class ParseExcel { @OntologyEditFunction() public async addData(upload: Upload): Promise<void> { try { const arrayBuffer = await this.getArrayBufferFromAttachment(upload); // Create a workbook from the arrayBuffer const workbook = new Workbook(); await workbook.xlsx.load(arrayBuffer); const meltedData = this.processWorkbook(workbook, upload.uploadedAt!, upload.uploadKey!); const version = this.generateVersion(upload.title!, upload.uploadedAt!); meltedData.forEach(row => { this.createNewData(row, version); }); upload.status = 'Success' // Update Upload Version upload.version = version; } catch (error: any) { upload.status = 'Failed'; upload.errorMessage = error.message; } } /** * Validate header to make sure it has all necessary headers */ private validateHeader(header: string[]): string | null { const target = 'target'; const yIntercept = 'Y_intercept'; if (header[1] !== target || header[2] !== yIntercept) { // console.log(header) return 'The header must start with target and Y_intercept in the first two columns.'; } const regex = /^[a-zA-Z0-9_]+$/; for (const columnName of header) { if (!regex.test(columnName)) { return `The header contains invalid characters. Only English letters and underscores are allowed. Invalid column name: ${columnName}`; } } return null; } private createNewData(row: DataRow, version: string) { const newData = Objects.create()._sampleData(Uuid.random()); newData.title = `${row.target} ${row.unit} ${row.property} Data`; newData.value = row.value; newData.target = row.target; newData.unit = row.unit; newData.version = version; newData.property_ = row.property; newData.uploadKey = row.upload_key; newData.propertyKey = this.generatePropertyKey(row.unit, row.property); return newData; } private generatePropertyKey(unit: string, property: string){ return `Sample_Property_Key_${unit}_${property}` } private generateVersion(unit: string, uploaded_at: Timestamp): string { // Format the timestamp as YYYYMMDD_HHMMSS const formattedDate = uploaded_at.toISOString().slice(0, 10).replace(/-/g, ""); const formattedTime = uploaded_at.toISOString().slice(11, 19).replace(/:/g, ""); // Create the version string using the unit and the formatted timestamp const version = `${unit}_${formattedDate}_${formattedTime}`; return version; } /** * Reads ArrayBuffer from the attachment in the given Upload object. * @param upload The Upload object containing the attachment. */ private async getArrayBufferFromAttachment(upload: Upload): Promise<ArrayBuffer> { const attachmentBlob = await upload.attachment!.readAsync(); return attachmentBlob.arrayBuffer(); } /** * Processes the workbook and returns a 2D array in DataFrame format. * @param workbook The Workbook object to process. */ private processWorkbook(workbook: Workbook, uploadedAt: Timestamp, uploadKey: string): DataRow[] { const allMeltedData: DataRow[] = []; workbook.eachSheet((sheet) => { const sheetUnit = sheet.name; // Process header let headerProcessed = false; let sheetHeader: string[] = []; const sheetData: any[][] = []; // Process rows of each sheet sheet.eachRow((row, rowIndex) => { if (row.values) { const rowValues = row.values as (string | number)[]; // If rowIndex is 1, handle it as a header if (rowIndex === 1) { sheetHeader = rowValues.filter(value => value !== null && typeof value === 'string') as string[]; sheetHeader.unshift('unit'); const validationResult = this.validateHeader(sheetHeader); if (validationResult) { throw new Error(validationResult); } headerProcessed = true; } else { const newRowValues = this.processRow(rowValues, sheetHeader); newRowValues.unshift(sheetUnit); sheetData.push(newRowValues); } } }); // If the header was not processed in the rows, add it manually if (!headerProcessed) { sheetHeader.unshift('unit'); const validationResult = this.validateHeader(sheetHeader); if (validationResult) { throw new Error(validationResult); } } const meltedData = this.unpivotData(sheetData, sheetUnit, uploadedAt, uploadKey, sheetHeader); allMeltedData.push(...meltedData); }); return allMeltedData; } /** * Processes a row from the workbook and returns the new row data. * @param rowValues The values of the row to process. * @param header The header array. */ private processRow(rowValues: (string | number)[], header: (string | number)[]): any[] { const newRowValues = Array(header.length).fill(0); // Fill with 0 rowValues.forEach((value, index) => { if (index > 0 && header[index - 1] !== undefined) { newRowValues[index - 1] = value; } }); return newRowValues.map((value, index) => { if (header[index] !== 'target' && header[index] !== 'unit') { return typeof value === 'string' ? parseFloat(value) : value; } return value; }); } /** * Unpivots the data and returns an array of MeltedDataRow objects. * @param data The 2D array in DataFrame format to unpivot. */ private unpivotData(data: any[][], unit: string, uploadedAt: Timestamp, uploadKey: string, header: string[]): DataRow[] { const idVars = ["unit", "target"]; const meltedData: DataRow[] = []; data.forEach(row => { const idValues = row.slice(0, idVars.length); row.slice(idVars.length).forEach((value, index) => { if (value !== 0) { const newRow: DataRow = { target: idValues[1], unit: unit, property: header[idVars.length + index] as string, // Use header to get the property name value: value, uploaded_at: uploadedAt, upload_key: uploadKey, }; meltedData.push(newRow); } }); }); return meltedData; } }
  • Date submitted: 2024-05-23
  • Tags: excel, typescript, functions on objects, file upload, dataframe