It was a honor to write this little script for you. It is not tested completely, but it works for the example.
Please note that the script as is
- works only for small files with some KB or a few MB;
- counts only the modified lines and outputs this number and not the number of replaced words/phrases;
- replaces entire file content when a line is modified instead of just the modified lines and therefore the line change indicator marks all lines as modified;
- searches for the censored words/phrases case-sensitive and replaces them with the case-sensitive words/phrases.
As the words list most likely contain the words/phrases only in lower case and the source file could contain the words/phrases also with first letter in upper case because of being at the beginning of a sentence, the script duplicates both word lists and changes the case of the first character in word/phrase in the two additional lists depending on the case of the first character of the censored word/phrase. Every text after the equal sign is checked with both lists. This should result in a correct output for the words/phrases at beginning of a sentence too.
If the source file is large a completely different method would be better which runs regular expression Replace All commands from top of source file for every word/phrase pair in the two files. The different method is most likely slower and produces lots of undo history entries if undo feature is not disabled because large file loaded without a temporary file, but the line change indicator would work with this approach.
Code: Select all
// Fixed full name of the 2 files with the words.
var sCensoredWordsFile = "C:\\Temp\\censoredwords.txt";
var sReplaceWordsFile = "C:\\Temp\\replacewords.txt";
var sLineTerminator = "\r\n";
// =========================================================================
function GetFileIndex (sFullNameOfFile) {
// Is the passed value not a string because simply nothing passed?
if (typeof(sFullNameOfFile) != "string") {
// With UltraEdit v16.00 and later there is a property which holds active document index.
if (typeof(UltraEdit.activeDocumentIdx) == "number") return UltraEdit.activeDocumentIdx;
sFullNameOfFile = UltraEdit.activeDocument.path;
}
// It is a string. Is the string empty?
else if (!sFullNameOfFile.length) {
if (typeof(UltraEdit.activeDocumentIdx) == "number") return UltraEdit.activeDocumentIdx;
sFullNameOfFile = UltraEdit.activeDocument.path;
}
// Windows file systems are not case sensitive. So best make all file
// names lowercase before comparing the name of the file to search
// for with the names of the already opened files. Users of UEX should
// use a case sensitive file name comparison and therefore don't need
// toLowerCase() here and in the following loop.
var sFileNameToCompare = sFullNameOfFile.toLowerCase();
// Compare the name of the file to search for with the (lowercase) file
// names of all already opened files. Return the document index of the
// file already opened when found.
for (var nDocIndex = 0; nDocIndex < UltraEdit.document.length; nDocIndex++) {
if (UltraEdit.document[nDocIndex].path.toLowerCase() == sFileNameToCompare) {
return nDocIndex;
}
}
return -1; // This file is not opened.
}
// =========================================================================
function sortByStringLength (sFirst,sSecond)
{
if (sFirst[1] > sSecond[1]) return -1;
else if (sFirst[1] < sSecond[1]) return 1;
return 0;
}
// =========================================================================
if (UltraEdit.document.length > 0) // Is any file opened?
{
// Define environment for this script.
UltraEdit.insertMode();
UltraEdit.columnModeOff();
UltraEdit.activeDocument.hexOff();
// Determine document index of active file - the source file.
var nSourceDocIndex = GetFileIndex();
var asCensoredUnsorted;
var asReplaceUnsorted;
// Load the censored words.
var nWordsDocIndex = GetFileIndex(sCensoredWordsFile);
if (nWordsDocIndex < 0)
{ // File with the censored words not yet opened.
UltraEdit.open(sCensoredWordsFile);
UltraEdit.activeDocument.selectAll();
asCensoredUnsorted = UltraEdit.activeDocument.selection.split(sLineTerminator);
UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
UltraEdit.document[nSourceDocIndex].setActive();
}
else
{ // File with the censored words already opened.
UltraEdit.document[nWordsDocIndex].selectAll();
asCensoredUnsorted = UltraEdit.document[nWordsDocIndex].selection.split(sLineTerminator);
UltraEdit.document[nWordsDocIndex].top();
}
// Remove last string from array if it is an empty string because
// the last line of the file has also a DOS line termination.
if (!asCensoredUnsorted[asCensoredUnsorted.length-1].length) asCensoredUnsorted.pop();
// Load the replace words.
nWordsDocIndex = GetFileIndex(sReplaceWordsFile);
if (nWordsDocIndex < 0)
{ // File with the replace words not yet opened.
UltraEdit.open(sReplaceWordsFile);
UltraEdit.activeDocument.selectAll();
asReplaceUnsorted = UltraEdit.activeDocument.selection.split(sLineTerminator);
UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
UltraEdit.document[nSourceDocIndex].setActive();
}
else
{ // File with the replace words already opened.
UltraEdit.document[nWordsDocIndex].selectAll();
asReplaceUnsorted = UltraEdit.document[nWordsDocIndex].selection.split(sLineTerminator);
UltraEdit.document[nWordsDocIndex].top();
}
if (!asReplaceUnsorted[asReplaceUnsorted.length-1].length) asReplaceUnsorted.pop();
// For script stability take the lower value of words count.
// Actually both word lists should contain same number of strings.
var nWordCount = asCensoredUnsorted.length;
if (asReplaceUnsorted.length < asCensoredUnsorted.length)
{
// Remove all censored words from array for which no replace word exists.
var nTooManyCensoredWords = nWordCount - asReplaceUnsorted.length;
asCensoredUnsorted.splice(asReplaceUnsorted.length,nTooManyCensoredWords);
nWordCount = asReplaceUnsorted.length;
}
else if (asReplaceUnsorted.length > asCensoredUnsorted.length)
{
// Remove all replace words from array for which no censored word exists.
var nTooManyReplaceWords = asReplaceUnsorted.length - nWordCount;
asReplaceUnsorted.splice(nWordCount,nTooManyReplaceWords);
}
// Build an array with index number and string length of censored
// word/phrase in a subarray for every censored word/phrase.
var anSortIndex = new Array();
for (var nWordIndex = 0; nWordIndex < nWordCount; nWordIndex++)
{
anSortIndex[nWordIndex] = new Array(nWordIndex,asCensoredUnsorted[nWordIndex].length);
}
// Sort this number array based on the string length values. This is
// for sorting the censored words/phrases from longest to shortest to
// avoid a replace of a shorther substring first before replacing the
// longer string containing the substring too and apply the same sort
// also on the list or replace words as a censored and a replace word
// build a pair.
anSortIndex.sort(sortByStringLength);
// There could be words to replace at beginning of a sentence in
// upper case and elsewhere in lower case. The lists most likely
// contain the words/phrases only in one case (lower case).
// So better create duplicates of these two lists with first character
// changed in case to find and replace also words starting with an
// upper case letter because of being at beginning of a sentence.
// At the same time 2 more arrays are created for censored and
// replace words/phrases completely converted to upper case.
var asCensoredWords = new Array(nWordCount);
var asReplaceWords = new Array(nWordCount);
var asCaseCensoredWords = new Array(nWordCount);
var asCaseReplaceWords = new Array(nWordCount);
var asUpperCensoredWords = new Array(nWordCount);
var asUpperReplaceWords = new Array(nWordCount);
for (var nWordIndex = 0; nWordIndex < nWordCount; nWordIndex++)
{
// Build the censored and replace words/phrase list based on string
// length of censored words/phrases from longest to shortest.
asCensoredWords[nWordIndex] = asCensoredUnsorted[anSortIndex[nWordIndex][0]];
asReplaceWords[nWordIndex] = asReplaceUnsorted[anSortIndex[nWordIndex][0]];
// Find out if first character of censored word/phrase
// is a lower or upper case character.
var sFirstChar = asCensoredWords[nWordIndex].substr(0,1);
var sUpperChar = sFirstChar.toLocaleUpperCase();
// Is the first character in censored word/phrase lower case?
if (sUpperChar != sFirstChar)
{
// Create the strings in the second arrays with first character in upper case.
asCaseCensoredWords[nWordIndex] = sUpperChar + asCensoredWords[nWordIndex].substr(1);
asCaseReplaceWords[nWordIndex] = asReplaceWords[nWordIndex].substr(0,1).toLocaleUpperCase() +
asReplaceWords[nWordIndex].substr(1);
}
else // The first character is already in upper case.
{
// Create the strings in the second arrays with first character in lower case.
asCaseCensoredWords[nWordIndex] = sFirstChar.toLocaleLowerCase() + asCensoredWords[nWordIndex].substr(1);
asCaseReplaceWords[nWordIndex] = asReplaceWords[nWordIndex].substr(0,1).toLocaleLowerCase() +
asReplaceWords[nWordIndex].substr(1);
}
asUpperCensoredWords[nWordIndex] = asCensoredWords[nWordIndex].toLocaleUpperCase();
asUpperReplaceWords[nWordIndex] = asReplaceWords[nWordIndex].toLocaleUpperCase();
}
// Get all lines of source file into an array.
UltraEdit.activeDocument.selectAll();
var asSourceLines = UltraEdit.activeDocument.selection.split(sLineTerminator);
var nLineModCount = 0;
// Process all lines with an equal sign.
for (var nLineNum = 0; nLineNum < asSourceLines.length; nLineNum++)
{
// Get character position of equal sign in current line.
var nEqualSignPos = asSourceLines[nLineNum].indexOf('=');
// If the line does not contain an equal sign, continue with next line.
if (nEqualSignPos < 0) continue;
// Get the text after the equal sign.
var sTextToCheck = asSourceLines[nLineNum].substr(++nEqualSignPos);
var sCheckedText = sTextToCheck;
// Replace case-sensitive all strings in the censored word list
// by the corresponding word/phrase in the replace word list.
for (var nWordIndex = 0; nWordIndex < nWordCount; nWordIndex++)
{
// Search and replace the word/phrase as written in the words lists.
// var rRegSearch = new RegExp("\\b"+asCensoredWords[nWordIndex]+"\\b","g");
var rRegSearch = new RegExp(asCensoredWords[nWordIndex],"g");
sCheckedText = sCheckedText.replace(rRegSearch,asReplaceWords[nWordIndex]);
// Search and replace the word/phrase with first character changed in case.
// rRegSearch = new RegExp("\\b"+asCaseCensoredWords[nWordIndex]+"\\b","g");
rRegSearch = new RegExp(asCaseCensoredWords[nWordIndex],"g");
sCheckedText = sCheckedText.replace(rRegSearch,asCaseReplaceWords[nWordIndex]);
// Search and replace the word/phrase with all letters in upper case.
// rRegSearch = new RegExp("\\b"+asUpperCensoredWords[nWordIndex]+"\\b","g");
rRegSearch = new RegExp(asUpperCensoredWords[nWordIndex],"g");
sCheckedText = sCheckedText.replace(rRegSearch,asUpperReplaceWords[nWordIndex]);
}
// Was indeed anything replaced in the text?
if (sCheckedText != sTextToCheck)
{
nLineModCount++;
// Concatenate fixed string up to equal sign with modified text.
asSourceLines[nLineNum] = asSourceLines[nLineNum].substr(0,nEqualSignPos) + sCheckedText;
}
}
if (nLineModCount) // Was any line modified?
{
// Use user clipboard 9 for writing all lines back to the file overwriting
// the selection as this is faster than using the document write command.
UltraEdit.selectClipboard(9);
UltraEdit.clipboardContent = asSourceLines.join(sLineTerminator);
UltraEdit.activeDocument.paste();
UltraEdit.clearClipboard();
UltraEdit.selectClipboard(0);
}
// Cancel selection and move caret to top of source file.
UltraEdit.activeDocument.top();
// Let the script user know how many lines were modified.
UltraEdit.messageBox(nLineModCount.toString() + " line" + (nLineModCount!=1 ? "s" : "") + " modified!");
}
Edited on 2013-03-03:
Script updated to sort the two arrays with the words/phrases based on length of the censored words/phrases. That was more tricky as I first thought. I had several ideas and decided to use the one above as it should be fast.
Further two more arrays are added and applied with all letters in upper case.