Okay, here is the UltraEdit script for this data alignment task.
Code: Select all
function DataAlign ()
{
// Define the parameters for the case-sensitive Perl regular expression
// Replace all executed on entire file to align the data columns.
// The Perl regular expression finds use also those parameters.
UltraEdit.perlReOn();
UltraEdit.activeDocument.findReplace.mode=0;
UltraEdit.activeDocument.findReplace.matchCase=true;
UltraEdit.activeDocument.findReplace.matchWord=false;
UltraEdit.activeDocument.findReplace.regExp=true;
UltraEdit.activeDocument.findReplace.searchDown=true;
UltraEdit.activeDocument.findReplace.searchInColumn=false;
UltraEdit.activeDocument.findReplace.preserveCase=false;
UltraEdit.activeDocument.findReplace.replaceAll=true;
UltraEdit.activeDocument.findReplace.replaceInAllOpen=false;
if (UltraEdit.activeDocument.findReplace.find("[«»]"))
{
UltraEdit.messageBox("The file contains character " + UltraEdit.activeDocument.selection +
" and cannot be processed because of that character.");
return;
}
// Replace each horizontal tab in entire file by a space using a simple normal
// replace all to make sure having finally really a fixed column width file.
UltraEdit.activeDocument.findReplace.regExp=false;
UltraEdit.activeDocument.findReplace.replace("^t", " ");
// Replace all spaces around all words starting with AM in upper case
// and having one or more digits by « left to the keyword and » right to
// the keyword. The characters « and » should never exist in the file.
UltraEdit.activeDocument.findReplace.regExp=true;
if (!UltraEdit.activeDocument.findReplace.replace("^(AM\\d+) +", "$1»"))
{
UltraEdit.messageBox("The file contains no value matching expression \"^AM\\d+ +\" and is not further processed for that reason.");
return;
}
UltraEdit.activeDocument.findReplace.replace(" +(AM\\d+) +", "«$1»");
// Remove all spaces at end of all lines and append « at end of all lines.
UltraEdit.activeDocument.trimTrailingSpaces();
UltraEdit.activeDocument.findReplace.replace("$", "«");
UltraEdit.activeDocument.findReplace.replace("^«", "");
// Select the first line of the file, get it loaded into memory with
// splitting this line up into substrings using « as separator and
// move the caret back to top of the file which cancels the selection.
UltraEdit.activeDocument.selectLine();
var asDataColumns = UltraEdit.activeDocument.selection.split("«");
UltraEdit.activeDocument.top();
// The last string is an empty string because of « at end of
// first line and must be removed from array for that reason.
asDataColumns.pop();
// Number of characters in each line from beginning of the
// line to beginning of identifier of current data column.
var nCharCount = 0;
// Current column in file at end of the currently
// longest found data value of the current data column.
var nEndDataColumn = 0;
// Current maximum number of characters of the currently
// longest found data value of the current data column.
var nMaxColumnChars = 0;
// Replace string for final data replace to align a data column.
var sFinalDataReplace = "$1 $2 ";
// Process this loop for all data columns in file.
for (var nDataColumn = 0; nDataColumn < asDataColumns.length; nDataColumn++)
{
// Get identifier string for current data column from the array.
// That are the strings AM22», AM23», AM24», etc.
var sIdentifier = asDataColumns[nDataColumn].replace(/^(AM\d+»).*$/,"$1");
var sSearchExp = sIdentifier + "[^«\\r\\n]*«";
// Find in file the longest data value with current identifier.
while (UltraEdit.activeDocument.findReplace.find(sSearchExp))
{
nEndDataColumn = UltraEdit.activeDocument.currentColumnNum;
nMaxColumnChars = nEndDataColumn - nCharCount - sIdentifier.length - 1;
sSearchExp = sIdentifier + "[^«\\r\\n]{" + nMaxColumnChars +",}«";
}
// Create a string consisting just of spaces with the number of
// characters of longest data value with current identifier.
nMaxColumnChars -= 1;
var sSpaces = "";
var nSpaceCount = nMaxColumnChars;
while (nSpaceCount)
{
sSpaces += " ";
nSpaceCount--;
}
// Move caret to top of file and append to all values of
// current data column spaces according to longest data value.
UltraEdit.activeDocument.top();
sSearchExp = "(" + sIdentifier + "[^«\\r\\n]*)«";
UltraEdit.activeDocument.findReplace.replace(sSearchExp, "$1" + sSpaces + "«");
// Truncate all data values of current data column according longest
// data value and replace » and « by a space. On last data column
// replace just » by a space and remove character « at end of line.
sSearchExp = "(" + sIdentifier.substr(0,sIdentifier.length-1) + ")»(.{" + nMaxColumnChars + "}) *?«";
if (nDataColumn == (asDataColumns.length - 1))
{
sFinalDataReplace = sFinalDataReplace.substr(0,sFinalDataReplace.length-1);
}
UltraEdit.activeDocument.findReplace.replace(sSearchExp, sFinalDataReplace);
nCharCount += sIdentifier.length + nMaxColumnChars + 1;
}
}
if (UltraEdit.document.length > 0) // Is any file opened?
{
// Define environment for this script.
UltraEdit.insertMode();
if (typeof(UltraEdit.columnModeOff) == "function") UltraEdit.columnModeOff();
else if (typeof(UltraEdit.activeDocument.columnModeOff) == "function") UltraEdit.activeDocument.columnModeOff();
// Move caret to top of the active file.
UltraEdit.activeDocument.top();
DataAlign();
}
The script was tested with UltraEdit for Windows v22.20 and should work with all UltraEdit versions supporting scripts.
I tested it on provided example data as well as a file with 220,000 lines and a file size greater than 16 MB.
The number of data columns identified by
AMx, with
x being a digit in range 0 to 9 which can occur one or more times after
AM, can vary from one to nearly unlimited. That was tested by adding two more data columns with
AM7 and
AM8 and running the script once on a file containing only
AM1 at beginning of each line.
It is verified that the active file on execution of the script does not contain the character
« or the character
» because of those two characters are used inside the script during execution for data alignment. A message box is displayed if active file contains one of these two characters being selected in this case in the file preventing data alignment by this script.
It is also verified that at least one line starts case-sensitive with
AMx as a minimal protection against running this script on wrong file. It is of course required that all lines start with same
AMx and all lines have same amount of
AMx in same order and no
AMx exists twice on a line. But these requirements on data structure are not verified by the script.