Splitting huge file into various files

Splitting huge file into various files

1
NewbieNewbie
1

    Oct 06, 2009#1

    I have a file containing about 2 million lines of this type:

    Code: Select all

    204/23-1                             3878.88             97
    204/23-1                             3879.03             97
    204/23-1                             3879.18             97
    204/23-1                             3879.33             97
    204/23-1                             3879.48             97
    204/23-1                             3879.63             97
    204/23-1                             3879.78             97
    204/23-1                             3879.93             97
    204/24-1A                            0458.63              0
    204/24-1A                            0458.78              0
    204/24-1A                            0458.93              0
    204/24-1A                            0459.08              0
    204/24-1A                            0459.23              0
    204/24-1A                            0459.38              0
    204/24-1A                            0459.53              0
    204/24-1A                            0459.68              0
    I want to split this file and save it with name based on the first column. The number of rows per file varies.

    I was thinking using underscore as a replacement for the / in the name of the files.
    It is not that important what is used as long as the files can be easily identified so I don't import the data into the wrong place afterwards.

    6,686585
    Grand MasterGrand Master
    6,686585

      Oct 06, 2009#2

      Here is one solution. It is surely not the fastest and best one, but it worked for your example. Best test the script on a small file and when it works, run it on the huge file during a larger break or over night. Please read the comments at top of the script!

      Code: Select all

      /* Insert here the functions GetFilePath and GetFileName from
         https://www.ultraedit.com/resources/scripts/FileNameFunctions.js
         or replace the function calls with fixed strings.
         Please note: The path string must end with a backslash and the file
         extension string is without a dot. And you must use 2 backslashes
         for every backslash in the path string like "C:\\Temp\\". */
      
      // Find the document index of the active document. Copied from
      // https://forums.ultraedit.com/viewtopic.php?f=52&t=4571
      function getActiveDocumentIndex () {
         for (var nDocIndex = 0; nDocIndex < UltraEdit.document.length; nDocIndex++) {
            if (UltraEdit.activeDocument.path == UltraEdit.document[nDocIndex].path) return nDocIndex;
         }
         return -1;
      }
      
      var nDataFileIndex = getActiveDocumentIndex();
      
      if (nDataFileIndex >= 0) {  // Is any file open?
      
         var sRow = "";
         var sData = "";
         var sField = "";
         var sFileName = "";
         var nFileCount = 0;
         var sFileExt  = GetFileExt(-1);
         var sFilePath = GetFilePath(-1);
         var DataFile = UltraEdit.document[nDataFileIndex];
      
         if (sFileExt == "") sFileExt = "csv";
      
         // Define the working environment for this script.
         UltraEdit.insertMode();
         UltraEdit.columnModeOff();
         UltraEdit.activeDocument.hexOff();
         UltraEdit.perlReOn();
      
         // Make sure the last line of the file has a line termination.
         DataFile.bottom();
         if (DataFile.isColNumGt(1)) {
            DataFile.insertLine();
            if (DataFile.isColNumGt(1)) {
               DataFile.deleteToStartOfLine();
            }
         }
         DataFile.top();       // Start from top of the file.
         UltraEdit.newFile();  // Open now a new file to avoid display updates.
      
         // Evaluate the file line by line until cursor reaches end of file.
         while (!DataFile.isEof()) {
      
            DataFile.selectLine();        // Select the current line.
            sRow = DataFile.selection;    // Get the selection into a variable.
            // Get the string from start of the line to first space or tab.
            var asFields = sRow.match(/^[^ \t\r\n]+/);
      
            // Blank lines should be ignored which means no matching string found.
            if(asFields) {
               // If this row starts with the same string as the row
               // before, then just append this row to the existing data.
               if (sField == asFields[0]) {
                  sData += sRow;
               } else {                   // New field string detected.
                  if (sData != "") {      // Some data already collected?
                     UltraEdit.newFile(); // Write the data into a new file.
                     UltraEdit.activeDocument.write(sData);
                     // Build the file name and avoid invalid characters.
                     sData = sField.replace(/[/:\>\<\?\\]/g,"_");
                     sFileName = sFilePath+sData+"."+sFileExt;
                     // Save the new file and close it.
                     UltraEdit.saveAs(sFileName);
                     UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
                     nFileCount++;
                  }
                  // Store current field string and row for next data block.
                  sData = sRow;
                  sField = asFields[0];
               }
            }
            DataFile.key("HOME");  // To unselect the selected line.
         }
      
         DataFile.top();      // Set the cursor back to top of the data file.
         if (sData != "") {   // Is there a block not already saved into a file?
            UltraEdit.document[UltraEdit.document.length-1].setActive();
            UltraEdit.activeDocument.write(sData);
            sData = sField.replace(/[/:\>\<\?\\]/g,"_");
            sFileName = sFilePath+sData+"."+sFileExt;
            UltraEdit.saveAs(sFileName);
         }
         // Close the new file created first (normally with data).
         UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
         sData = (nFileCount == 1) ? " file." : " files."
         UltraEdit.messageBox("Script saved "+nFileCount+sData);
      }
      Best regards from an UC/UE/UES for Windows user from Austria