Script to reformat a CSV file with multiple columns to a single column CSV with column header above each value

Script to reformat a CSV file with multiple columns to a single column CSV with column header above each value

8
NewbieNewbie
8

    Mar 15, 2020#1

    Hi Mofi,

    I need your expert advise. I need to merge all rows into one column which is in Excel format using UltraEdit.

    Each and every row must contain the header when merged. Please find attachment for you attention.

    TQ
    FORMULATION.xlsx (9.88 KiB)   0

    6,687586
    Grand MasterGrand Master
    6,687586

      Mar 16, 2020#2

      UltraEdit is a text editor. Therefore it can be used only to reformat a CSV file (text file), but not a binary Microsoft Excel file.

      The script below should do the task for CSV files which can be loaded completely into memory of JavaScript core engine.

      Code: Select all

      // Character code value of separator. Use 0x2C for comma, 0x3B for
      // semicolon, 0x7C for vertical bar, or 0x09 for a horizontal tab.
      var nSeparatorCode = 0x2C;
      
      // Write the reformatted lines into a new file or overwrite active file.
      var bNewFile = true;
      
      function ProcessFile ()
      {
         // Parse the selected CSV data character by character to support also one or
         // more delimiters as well as newline characters within a double quoted value.
      
         var bNewDataRow = false;     // For detection of a new data row.
         var bWithinQuotes = false;   // For detection of double quoted values.
         var nCharCode;               // Character code of current character.
         var nCharIndex = 0;          // Index of current character in character stream.
         var nColumnIndex = 0;        // Column index in current data row.
         var nValueStart = 0;         // Start index of current value in character stream.
         var asColumnHeaders = [];    // Array with all strings of the column headers.
         var asNewRows = [];          // Array with all new rows respectively values.
      
         do    // Skip all carriage returns and line-feeds at top of the CSV file.
         {
            nCharCode = UltraEdit.activeDocument.selection.charCodeAt(nCharIndex);
            if ((nCharCode != 0x0D) && (nCharCode != 0x0A))
            {
               nValueStart = nCharIndex;
               break;
            }
         }
         while (++nCharIndex < UltraEdit.activeDocument.selection.length);
      
         // Has the file just empty lines?
         if (nCharIndex >= UltraEdit.activeDocument.selection.length) return;
      
         // Parse the header row with the column headers.
         do
         {
            nCharCode = UltraEdit.activeDocument.selection.charCodeAt(nCharIndex);
            if (!bWithinQuotes)
            {
               // Is the current character the separator?
               if (nCharCode == nSeparatorCode)
               {
                  asColumnHeaders.push(UltraEdit.activeDocument.selection.substring(nValueStart,nCharIndex));
                  nValueStart = nCharIndex + 1;
               }
               // Is the current character a carriage return or a line-feed?
               else if ((nCharCode == 0x0D) || (nCharCode == 0x0A))
               {
                  if (!bNewDataRow)
                  {
                     bNewDataRow = true;
                     asColumnHeaders.push(UltraEdit.activeDocument.selection.substring(nValueStart,nCharIndex));
                  }
                  nValueStart = nCharIndex + 1;
                  continue;
               }
               else if (nCharCode == 0x22)
               {
                  bWithinQuotes = true;
               }
               // Is this the first character after the header row?
               if (bNewDataRow)
               {
                  bWithinQuotes = false;
                  bNewDataRow = false;
                  break;
               }
            }
            else  // Everything enclosed in double quotes must be interpreted
            {     // as part of data value including delimiter and newline
                  // character until next double quote character is found.
               if (nCharCode == 0x22)
               {
                  bWithinQuotes = false;
               }
            }
         }
         while (++nCharIndex < UltraEdit.activeDocument.selection.length);
      
         // Does the CSV file contain only one row?
         if (nCharIndex >= UltraEdit.activeDocument.selection.length)
         {
            // Does the file not end with newline characters?
            if (!bNewDataRow)
            {
               asColumnHeaders.push(UltraEdit.activeDocument.selection.substring(nValueStart,nCharIndex));
            }
            // Append to array of column headers an empty string to have
            // the last line in file finally also with a line termination.
            asColumnHeaders.push("");
            // The single row is transformed to a single column.
            asNewRows = asColumnHeaders;
         }
         else
         {
            // Parse all the data rows and create the new rows in memory
            // with the header above according to current data column.
            do
            {
               nCharCode = UltraEdit.activeDocument.selection.charCodeAt(nCharIndex);
               if (!bWithinQuotes)
               {
                  // Is the current character the separator?
                  if (nCharCode == nSeparatorCode)
                  {
                     if (nColumnIndex < asColumnHeaders.length)
                     {
                        asNewRows.push(asColumnHeaders[nColumnIndex]);
                        nColumnIndex++;
                     }
                     else
                     {
                        nColumnIndex++;
                        asNewRows.push("Column "+nColumnIndex.toString(10));
                     }
                     asNewRows.push(UltraEdit.activeDocument.selection.substring(nValueStart,nCharIndex));
                     nValueStart = nCharIndex + 1;
                  }
                  // Is the current character a carriage return or a line-feed?
                  else if ((nCharCode == 0x0D) || (nCharCode == 0x0A))
                  {
                     if (!bNewDataRow)
                     {
                        bNewDataRow = true;
                        if (nColumnIndex < asColumnHeaders.length)
                        {
                           asNewRows.push(asColumnHeaders[nColumnIndex]);
                           nColumnIndex++;
                        }
                        else
                        {
                           nColumnIndex++;
                           asNewRows.push("Column "+nColumnIndex.toString(10));
                        }
                        asNewRows.push(UltraEdit.activeDocument.selection.substring(nValueStart,nCharIndex));
                        // Append to array of new rows an empty string to have
                        // an empty row between all the data from one data row.
                        asNewRows.push("");
                        nColumnIndex = 0;
                     }
                     nValueStart = nCharIndex + 1;
                     continue;
                  }
                  else if (nCharCode == 0x22)
                  {
                     bWithinQuotes = true;
                  }
                  bNewDataRow = false;
               }
               else  // Everything enclosed in double quotes must be interpreted
               {     // as part of data value including delimiter and newline
                     // character until next double quote character is found.
                  if (nCharCode == 0x22)
                  {
                     bWithinQuotes = false;
                  }
               }
            }
            while (++nCharIndex < UltraEdit.activeDocument.selection.length);
      
            // Does the file not end with a newline character?
            if (!bNewDataRow)
            {
               if (nColumnIndex < asColumnHeaders.length)
               {
                  asNewRows.push(asColumnHeaders[nColumnIndex]);
               }
               else
               {
                  nColumnIndex++;
                  asNewRows.push("Column "+nColumnIndex.toString(10));
               }
               asNewRows.push(UltraEdit.activeDocument.selection.substring(nValueStart,nCharIndex));
               // Append to array of new rows an empty string to have the
               // last line in file finally also with a line termination.
               asNewRows.push("");
            }
         }
      
         // Determine line terminator type from active file.
         var sLineTerm;
         if (UltraEdit.activeDocument.lineTerminator < 1) sLineTerm = "\r\n";
         else if (UltraEdit.activeDocument.lineTerminator == 1) sLineTerm = "\n";
         else sLineTerm = "\r";
      
         if (bNewFile) // Write the reformatted lines into a new file?
         {
            // Replace the file extension of active file by ".txt".
            // This quick and dirty solution works only for files
            // which really have a file extension.
            var sNewFileName = UltraEdit.activeDocument.path.replace(/\.[^.\\]+$/,".txt");
            // Append .txt if new file name is identical to active file name.
            if (sNewFileName == UltraEdit.activeDocument.path) sNewFileName += ".txt";
      
            // Cancel the selection in input file.
            UltraEdit.activeDocument.top();
      
            // Create a new file with same line ending type as input file.
            UltraEdit.newFile();
            UltraEdit.activeDocument.unixMacToDos();
            if (sLineTerm == "\n") UltraEdit.activeDocument.dosToUnix();
            else if (sLineTerm == "\r") UltraEdit.activeDocument.dosToMac();
      
            UltraEdit.activeDocument.write(asNewRows.join(sLineTerm));
            UltraEdit.saveAs(sNewFileName);
            UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
         }
         else
         {
            // Overwrite the entire data in input file.
            UltraEdit.activeDocument.write(asNewRows.join(sLineTerm));
            // Close file with saving it.
            UltraEdit.closeFile(UltraEdit.activeDocument.path,1);
         }
      }
      
      if (UltraEdit.document.length > 0)  // Is any file opened?
      {
         // Define environment for this script.
         UltraEdit.insertMode();
         if (typeof(UltraEdit.columnModeOff) == "function") UltraEdit.columnModeOff();
         else if (typeof(UltraEdit.activeDocument.columnModeOff) == "function") UltraEdit.activeDocument.columnModeOff();
      
         UltraEdit.activeDocument.selectAll();
         if (UltraEdit.activeDocument.isSel()) ProcessFile();
      }
      
      There is an empty data row inserted between each block of values transformed from a data row with multiple columns into a single column. Please read the comments and remove the line 140 with asNewRows.push(""); responsible for this empty data row according to the comment above if that empty row is not wanted in reformatted (new) file. The line 178 with asNewRows.push(""); with the two comment lines above must be moved done one line in this case to have the last line in file always terminated with a line termination.
      Best regards from an UC/UE/UES for Windows user from Austria