I wrote an UltraEdit script for this task which can be executed only on active file. I think, it is not possible to use
Replace in Files for this task to process all HTML files in a folder without opening one after the other in UltraEdit. The script below can be extended with
GetListOfFiles to process all files of a folder or folder tree by opening a file, running the finds/replaces as in posted script, saving and closing the file, and redo that until all files in the list are processed by the extended script.
The UltraEdit script is not very efficient if the active file contains lots of preformatted text sections, i.e. lots of
PRE or
CODE or
KBD elements. It was not possible to use the most efficient method to load the entire HTML file content into memory of JavaScript interpreter, process it there and output the result into the file replacing everything which would result in just one undo record and just two or three document window updates. The reason is that the script must be compatible with UltraEdit for Windows v22.20 which cannot load correct all non-ASCII characters into memory of JavaScript interpreter. Full (basic multilingual plane) Unicode aware UltraEdit v24.00 or a newer version would be required to process also HTML files with non-ASCII characters completely correct in memory of JavaScript interpreter inside of UltraEdit.
I decided to make the script compatible with all versions of UltraEdit for Windows of v13.10 or a newer version. That downwards compatibility required a few additional lines in script code which would not be needed for UE v22.20.
The script below contains two times the line:
Code: Select all
if (UltraEdit.activeDocument.findReplace.replace("\\n[\\t\\n\\r ]+","\\n")) nBlockModCount++;
If the Perl regular expression engine of used UltraEdit supports
\K, I recommend to change both lines to:
Code: Select all
if (UltraEdit.activeDocument.findReplace.replace("\\n\\K[\\t\\n\\r ]+","")) nBlockModCount++;
UltraEdit for Windows v22.20 supports
\K (keep back) as it can be seen by running first a Perl regular expression find with search expression
\n[\t\n\r ]+ and on something found, moving caret upwards to the beginning of the line with line-feed selected at the end of the line and running again a Perl regular expression find with search expression
\n\K[\t\n\r ]+. If the same whitespaces are found again with the difference that the line-feed at the end of the line is not selected this time, the used version of UltraEdit contains a Perl regular expression engine supporting the keep back expression
\K which results in not selecting for the replace everything found by the expression left to
\K in the search expression.
Here is the script code tested with UE v13.10a+2, v22.20.0.49 and v28.00.0.114 on an example HTML file created by myself:
Code: Select all
if (UltraEdit.document.length > 0) // Is any file opened?
{
// Define environment for this script.
UltraEdit.insertMode();
if (typeof(UltraEdit.columnModeOff) == "function") UltraEdit.columnModeOff();
else if (typeof(UltraEdit.activeDocument.columnModeOff) == "function") UltraEdit.activeDocument.columnModeOff();
// Move caret to top of the active file.
UltraEdit.activeDocument.top();
// Define all the parameters for the Perl regular expression finds/replaces.
UltraEdit.perlReOn();
UltraEdit.activeDocument.findReplace.mode=0;
UltraEdit.activeDocument.findReplace.matchCase=false;
UltraEdit.activeDocument.findReplace.matchWord=false;
UltraEdit.activeDocument.findReplace.regExp=true;
UltraEdit.activeDocument.findReplace.searchDown=true;
if (typeof(UltraEdit.activeDocument.findReplace.searchInColumn) == "boolean")
{
UltraEdit.activeDocument.findReplace.searchInColumn=false;
}
UltraEdit.activeDocument.findReplace.preserveCase=false;
UltraEdit.activeDocument.findReplace.replaceAll=true;
UltraEdit.activeDocument.findReplace.replaceInAllOpen=false;
UltraEdit.activeDocument.findReplace.selectText=false;
// Starting with UE for Windows v16.00 / UES v10.00 the property
// currentColumnNum contains the column number with number 1 for
// first column in line. Former versions return 0 for first column.
// This difference must be compensated by adding 0 or 1 depending
// on version of UE / UES.
var nColumnOffset = (typeof(UltraEdit.activeDocumentIdx) == "undefined") ? 1 : 0;
// Counts the blocks modified due to deletion of usual whitespaces.
var nBlockModCount = 0;
// Define some variables used inside the loop once outside of the loop.
var sEndTag = "";
var sStartTag = "";
var nEndColumnNumber = 1;
var nStartLineNumber = 1;
var nStartLColumnNumber = 1;
for(;;)
{
// Store the current caret position (line and column number) in variables.
nStartLineNumber = UltraEdit.activeDocument.currentLineNum;
nStartLColumnNumber = UltraEdit.activeDocument.currentColumnNum + nColumnOffset;
// Search case-insensitive with a Perl Regular expression for the
// next start tag of a PRE, CODE or KBD element in the active file.
if (UltraEdit.activeDocument.findReplace.find("<(?:pre|code|kbd)\\b"))
{
// Load the found start tag into a variable.
sStartTag = UltraEdit.activeDocument.selection;
// Move the caret back to beginning of the found start tag and cancel the selection.
nEndColumnNumber = UltraEdit.activeDocument.currentColumnNum + nColumnOffset;
// In very old versions of UltraEdit the caret is at the beginning
// of the found string instead of the end and for that reason the
// column number is UE version dependent. If the character at the
// current caret position is not an opening angle bracket, the
// caret is (most likely) at the end of the found string.
if (!UltraEdit.activeDocument.isChar("<")) nEndColumnNumber-= sStartTag.length;
UltraEdit.activeDocument.gotoLine(0,nEndColumnNumber);
// Select everything from the beginning of the found start
// tag to the previous caret position in the active file.
UltraEdit.activeDocument.gotoLineSelect(nStartLineNumber,nStartLColumnNumber);
if (UltraEdit.activeDocument.isSel())
{
// Change the mode to replace in current selection and run a
// Perl regular expression replace to delete all leading normal
// spaces, horizontal tabs, carriage returns and line-feeds in
// this selection, but not other whitespaces according to the
// Unicode specification which would be matched by \s too like
// a non-breaking space or an en space or a zero width joiner.
UltraEdit.activeDocument.findReplace.mode=1;
UltraEdit.activeDocument.findReplace.selectText=true;
if (UltraEdit.activeDocument.findReplace.replace("\\n[\\t\\n\\r ]+","\\n")) nBlockModCount++;
UltraEdit.activeDocument.findReplace.selectText=false;
UltraEdit.activeDocument.findReplace.mode=0;
}
// Move caret with a find to the corresponding end tag.
sEndTag = "</" + sStartTag.substr(1) + ">";
if (!UltraEdit.activeDocument.findReplace.find(sEndTag))
{
UltraEdit.activeDocument.findReplace.find(sStartTag + "\\b");
UltraEdit.outputWindow.showWindow(true);
UltraEdit.outputWindow.write("ERROR: Cannot find the end tag for the selected start tag.");
UltraEdit.outputWindow.write(" Everything after this start tag is not further processed.");
break;
}
}
else
{
// Select from the current caret position to the end of the file.
UltraEdit.activeDocument.selectToBottom();
// If the caret was not already at the end of the file and so
// there is really text selected, run the replace to delete all
// leading spaces/tabs and newline characters in this selection.
if (UltraEdit.activeDocument.isSel())
{
UltraEdit.activeDocument.findReplace.mode=1;
UltraEdit.activeDocument.findReplace.selectText=true;
if (UltraEdit.activeDocument.findReplace.replace("\\n[\\t\\n\\r ]+","\\n")) nBlockModCount++;
UltraEdit.activeDocument.findReplace.selectText=false;
UltraEdit.activeDocument.findReplace.mode=0;
}
UltraEdit.activeDocument.top();
break;
}
}
// Output a very small summary information into the output window
// without changing the visibility status of the output window.
var sPluralsDot = (nBlockModCount != 1) ? "s." : ".";
UltraEdit.outputWindow.write("Whitespace deletions done on " + nBlockModCount.toString(10) + " block" + sPluralsDot);
}