How to find non consecutive duplicate lines in a large file?

How to find non consecutive duplicate lines in a large file?

221
Basic UserBasic User
221

    Oct 21, 2018#1

    How to find non consecutive duplicate lines in a large file?

    My file(s) could be rather large, about 60k to 100k lines. I tried it with this regular expression:

    Code: Select all

    (?-s)^(.+)\R(?s)(?=.*\R\1\R?)
    This works fine in Notepad++, but only for a small file. That is the reason why I' looking for something similar or a workaround.

    I was wondering if UltraEdit would do better in this. But it doesn't work with UltraEdit version 16.30. I stopped updating since I realized how heavy UE became.

    Here is a simple example:

    Code: Select all

    Ant
    Butterfly
    Mosquito
    
    Ant
    Antelope
    Butterfly
    Crocodile
    Elephant
    Giraffe
    Lion
    Mosquito
    Panther
    Zebra
    
    I don't want to remove the duplicates. I only want to see them or mark/bookmark them, that's all. The lines to mark should be Ant, Mosquito & Butterfly (in the first block) and it should work with large files too.

    6,675585
    Grand MasterGrand Master
    6,675585

      Oct 26, 2018#2

      No regular expression find can do that in any application on a large file.

      I wrote for this task first following UltraEdit script and tested it with UltraEdit v16.30 on the small example:

      Code: Select all

      if (UltraEdit.document.length > 0)  // Is any file opened?
      {
         // Define environment for this script.
         UltraEdit.insertMode();
         if (typeof(UltraEdit.columnModeOff) == "function") UltraEdit.columnModeOff();
         else if (typeof(UltraEdit.activeDocument.columnModeOff) == "function") UltraEdit.activeDocument.columnModeOff();
      
         // Move caret to bottom of the active file and make sure the last line
         // has also a line ending. Next get line number which is equal the total
         // number of lines in active file. Then move caret to top of the file.
         UltraEdit.activeDocument.bottom();
         if (UltraEdit.activeDocument.isColNumGt(1))
         {
            UltraEdit.activeDocument.insertLine();
            if (UltraEdit.activeDocument.isColNumGt(1))
            {
               UltraEdit.activeDocument.deleteToStartOfLine();
            }
         }
         var nTotalLines = UltraEdit.activeDocument.currentLineNum;
         UltraEdit.activeDocument.top();
      
         // Has the active file at least two lines?
         if (nTotalLines > 2)
         {
            // Select user clipboard 9 as active clipboard.
            UltraEdit.selectClipboard(9);
      
            // Get document index of active file.
            var nDocIndex = UltraEdit.activeDocumentIdx;
            // Get name of file with full path and append an opening parenthesis.
            var sFileName = UltraEdit.activeDocument.path + '(';
      
            // There must be executed lots of finds which would result in lots
            // of document window refreshes which would take a lot of time.
            // For that reason create a new file which becomes the active file
            // and which is hopefully displayed maximized making it not necessary
            // for UltraEdit running the finds in previously active file to
            // refresh document window area after each successful find.
            UltraEdit.newFile();
      
            // Create an empty array with size equal total number of lines.
            var abLineOutput = new Array(nTotalLines);
      
            // Define the parameters for the finds used below.
            UltraEdit.ueReOn();
            UltraEdit.document[nDocIndex].findReplace.mode=0;
            UltraEdit.document[nDocIndex].findReplace.matchCase=true;
            UltraEdit.document[nDocIndex].findReplace.matchWord=false;
            UltraEdit.document[nDocIndex].findReplace.regExp=false;
            UltraEdit.document[nDocIndex].findReplace.searchDown=true;
            UltraEdit.document[nDocIndex].findReplace.searchInColumn=false;
      
            // Prepare the active output window for the find results.
            UltraEdit.outputWindow.clear();
            UltraEdit.outputWindow.showStatus=false;
            UltraEdit.outputWindow.showWindow(false);
      
            // It is necessary to copy to user clipboard 9 each line and search
            // for that line in all lines below active line in the file to find
            // duplicates of this line. It is not possible to use a regular
            // expression find to make sure that a line is really 100% identical
            // from beginning to end of current line and does not contain by
            // chance the same string as the current line because of the current
            // line could contain also characters which could have a regular
            // expression meaning. Lines already found once and written to the
            // output window must be also ignored to avoid producing duplicates
            // in output window.
            var nDuplicateLines = 0;
            var nTotalDuplicates = 0;
            for (var nLineNumber = 1; nLineNumber < nTotalLines; nLineNumber++)
            {
               // Is this line already written to output window?
               if (abLineOutput[nLineNumber] != null) continue;
      
               // Go to next line to compare against all other lines below
               // in initial active file and move caret to end of this line.
               UltraEdit.document[nDocIndex].gotoLine(nLineNumber,1);
               UltraEdit.document[nDocIndex].key("END");
      
               // Get column number which is equal length of line.
               var nLineLength = UltraEdit.document[nDocIndex].currentColumnNum;
      
               // Is caret still at first column, the line is empty.
               if (UltraEdit.document[nDocIndex].isColNum(1)) continue;
      
               // Select the line and copy it to active user clipboard 9.
               UltraEdit.document[nDocIndex].selectLine();
               UltraEdit.document[nDocIndex].copy();
      
               var sSearchedLine = "";
               while (UltraEdit.document[nDocIndex].findReplace.find("^c"))
               {
                  // Move caret to end of found line.
                  UltraEdit.document[nDocIndex].key("UP ARROW");
                  UltraEdit.document[nDocIndex].key("END");
      
                  // Has the found line not the same length as the search line?
                  if (UltraEdit.document[nDocIndex].currentColumnNum != nLineLength) continue;
      
                  // A real duplicate line was found to write to output window.
                  // Is the searched line with its line number already written
                  // to the output window?
                  if (!sSearchedLine.length)
                  {
                     // Insert an empty line before a new series of duplicate
                     // lines except on first series of duplicate lines.
                     if (nDuplicateLines) UltraEdit.outputWindow.write("");
                     // Remove all line ending characters from searched line
                     // in active user clipboard 9 on concatenating it with
                     // the fixed string written into the string variable.
                     sSearchedLine = "): " + UltraEdit.clipboardContent.replace(/[\r\n]+/,"");
                     // Output file name with path, line number in round brackets
                     // and after a colon and a space the search line itself.
                     UltraEdit.outputWindow.write(sFileName + nLineNumber + sSearchedLine);
                     nDuplicateLines++;
                     nTotalDuplicates++;
                  }
                  // Output the found duplicate of searched line in same format
                  // as the searched line to the output window and mark this
                  // line in array of lines already output as output.
                  UltraEdit.outputWindow.write(sFileName + UltraEdit.document[nDocIndex].currentLineNum + sSearchedLine);
                  abLineOutput[UltraEdit.document[nDocIndex].currentLineNum] = true;
                  nTotalDuplicates++;
               }
            }
      
            // Clear user clipboard 9 and select clipboard of operating system.
            UltraEdit.clearClipboard(9);
            UltraEdit.selectClipboard(0);
      
            // Output a summary information at bottom of output window.
            UltraEdit.outputWindow.showWindow(true);
            var sLinesPluralS = (nDuplicateLines != 1) ? "s" : "";
            UltraEdit.outputWindow.write("");
            UltraEdit.outputWindow.write("Found " + nDuplicateLines + " line" + sLinesPluralS +
                                         " existing more than once with in total " +
                                          nTotalDuplicates + " duplicate lines.");
      
            // Move caret to top of the file and close new file without saving it.
            UltraEdit.document[nDocIndex].top();
            UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
         }
      }
      
      This script does not modify the active file, except the last line in file has no line ending which would be inserted in this case.

      The output window displayed for this small test file:

      Code: Select all

      C:\Temp\SmallTestFile.tmp(3): Ant
      C:\Temp\SmallTestFile.tmp(12): Ant
      
      C:\Temp\SmallTestFile.tmp(6): Mosquito
      C:\Temp\SmallTestFile.tmp(13): Mosquito
      
      C:\Temp\SmallTestFile.tmp(9): Butterfly
      C:\Temp\SmallTestFile.tmp(14): Butterfly
      
      Found 3 lines existing more than once with in total 6 duplicate lines.
      
      Now it was possible to use the Ctrl+Shift+Down Arrow and Ctrl+Shift+Up Arrow from within document window to set caret in document window to next/previous line as listed in output window.

      Next I created a large file with the few lines in upper block of example at top, many other lines not containing any duplicate line (inserted incrementing number at beginning of each line) and the three duplicate lines at bottom. The file had 194,911 lines and a file size of 11,217,012 bytes. I started the script and canceled execution after 10 minutes on seeing in status bar that just line 1238 was reached after 10 minutes.

      Well, it was clear for me from the beginning that searching for non consecutive duplicate lines by taking a line and searching if any line below is a real duplicate can take many minutes because of lots of bytes must be compared again and again. But the very slow progress was definitely caused by the multiple caret movements in file in background which cause additionally lots of status bar refreshes. Window refreshes during processing a lot of data makes such processes always extremely slow.

      So I decided to change the approach a little bit by inserting a marker string at beginning of each line to be able to search for entire lines with avoiding finding lines ending by chance with same string as an entire line with the assumption that the marker string is not present in the file at all.

      Code: Select all

      if (UltraEdit.document.length > 0)  // Is any file opened?
      {
         // Define environment for this script.
         UltraEdit.insertMode();
         if (typeof(UltraEdit.columnModeOff) == "function") UltraEdit.columnModeOff();
         else if (typeof(UltraEdit.activeDocument.columnModeOff) == "function") UltraEdit.activeDocument.columnModeOff();
      
         // Move caret to bottom of the active file and make sure the last line
         // has also a line ending. Next get line number which is equal the total
         // number of lines in active file. Then move caret to top of the file.
         UltraEdit.activeDocument.bottom();
         if (UltraEdit.activeDocument.isColNumGt(1))
         {
            UltraEdit.activeDocument.insertLine();
            if (UltraEdit.activeDocument.isColNumGt(1))
            {
               UltraEdit.activeDocument.deleteToStartOfLine();
            }
         }
         var nTotalLines = UltraEdit.activeDocument.currentLineNum;
         UltraEdit.activeDocument.top();
      
         // Has the active file at least two lines?
         if (nTotalLines > 2)
         {
            // Select user clipboard 9 as active clipboard.
            UltraEdit.selectClipboard(9);
      
            // Get document index of active file.
            var nDocIndex = UltraEdit.activeDocumentIdx;
            // Get name of file with full path and append an opening parenthesis.
            var sFileName = UltraEdit.activeDocument.path + '(';
      
            // There must be executed lots of finds which would result in lots
            // of document window refreshes which would take a lot of time.
            // For that reason create a new file which becomes the active file
            // and which is hopefully displayed maximized making it not necessary
            // for UltraEdit running the finds in previously active file to
            // refresh document window area after each successful find.
            UltraEdit.newFile();
      
            // Create an empty array with size equal total number of lines.
            var abLineOutput = new Array(nTotalLines);
      
            // Define the parameters for a replace inserting a marker string
            // at beginning of all non empty lines and run the replace all.
            UltraEdit.ueReOn();
            UltraEdit.document[nDocIndex].findReplace.mode=0;
            UltraEdit.document[nDocIndex].findReplace.matchCase=true;
            UltraEdit.document[nDocIndex].findReplace.matchWord=false;
            UltraEdit.document[nDocIndex].findReplace.regExp=true;
            UltraEdit.document[nDocIndex].findReplace.searchDown=true;
            UltraEdit.document[nDocIndex].findReplace.searchInColumn=false;
            UltraEdit.document[nDocIndex].findReplace.preserveCase=false;
            UltraEdit.document[nDocIndex].findReplace.replaceAll=true;
            UltraEdit.document[nDocIndex].findReplace.replaceInAllOpen=false;
            UltraEdit.document[nDocIndex].findReplace.replace("%^(?^)","#!#^1");
      
            // Define the parameters for the finds used below.
            UltraEdit.document[nDocIndex].findReplace.regExp=false;
      
            // Prepare the active output window for the find results.
            UltraEdit.outputWindow.clear();
            UltraEdit.outputWindow.showStatus=false;
            UltraEdit.outputWindow.showWindow(false);
      
            // It is necessary to copy to user clipboard 9 each line and search
            // for that line in all lines below active line in the file to find
            // duplicates of this line. It is not possible to use a regular
            // expression find to make sure that a line is really 100% identical
            // from beginning to end of current line and does not contain by
            // chance the same string as the current line because of the current
            // line could contain also characters which could have a regular
            // expression meaning. Lines already found once and written to the
            // output window must be also ignored to avoid producing duplicates
            // in output window. For that reason the marker string was inserted
            // at beginning of each non empty line and this marker string is
            // used as beginning of line anchor. It hopefully does not exist
            // anywhere else in a line.
            var nDuplicateLines = 0;
            var nTotalDuplicates = 0;
            for (var nLineNumber = 1; nLineNumber < nTotalLines; nLineNumber++)
            {
               // Is this line already written to output window?
               if (abLineOutput[nLineNumber] != null) continue;
      
               // Go to next line to compare against all other lines below
               // in initial active file.
               UltraEdit.document[nDocIndex].gotoLine(nLineNumber,1);
      
               // Is first character on this line not # from marker string #!#
               // then this is an empty line which must be ignored for searching.
               if (!UltraEdit.document[nDocIndex].isChar("#")) continue;
      
               // Select the line and copy it to active user clipboard 9.
               UltraEdit.document[nDocIndex].selectLine();
               UltraEdit.document[nDocIndex].copy();
      
               var sSearchedLine = "";
               while (UltraEdit.document[nDocIndex].findReplace.find("^c"))
               {
                  // A real duplicate line was found to write to output window.
                  // Is the searched line with its line number already written
                  // to the output window?
                  if (!sSearchedLine.length)
                  {
                     // Insert an empty line before a new series of duplicate
                     // lines except on first series of duplicate lines.
                     if (nDuplicateLines) UltraEdit.outputWindow.write("");
                     // Remove all line ending characters from searched line
                     // in active user clipboard 9 on concatenating it with
                     // the fixed string written into the string variable.
                     sSearchedLine = "): " + UltraEdit.clipboardContent.substr(3).replace(/[\r\n]+/,"");
                     // Output file name with path, line number in round brackets
                     // and after a colon and a space the search line itself.
                     UltraEdit.outputWindow.write(sFileName + nLineNumber + sSearchedLine);
                     nDuplicateLines++;
                     nTotalDuplicates++;
                  }
                  // Output the found duplicate of searched line in same format
                  // as the searched line to the output window and mark this
                  // line in array of lines already output as output.
                  UltraEdit.outputWindow.write(sFileName + UltraEdit.document[nDocIndex].currentLineNum + sSearchedLine);
                  abLineOutput[UltraEdit.document[nDocIndex].currentLineNum] = true;
                  nTotalDuplicates++;
               }
            }
      
            // Clear user clipboard 9 and select clipboard of operating system.
            UltraEdit.clearClipboard(9);
            UltraEdit.selectClipboard(0);
      
            // Output a summary information at bottom of output window.
            UltraEdit.outputWindow.showWindow(true);
            var sLinesPluralS = (nDuplicateLines != 1) ? "s" : "";
            UltraEdit.outputWindow.write("");
            UltraEdit.outputWindow.write("Found " + nDuplicateLines + " line" + sLinesPluralS +
                                         " existing more than once with in total " +
                                          nTotalDuplicates + " duplicate lines.");
      
            // Move caret to top of the file and remove the marker strings.
            UltraEdit.document[nDocIndex].top();
            UltraEdit.document[nDocIndex].findReplace.regExp=true;
            UltraEdit.document[nDocIndex].findReplace.replace("%#!#","");
      
            // Close new file without saving it.
            UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
         }
      }
      
      I tested it first on the small example file and it produced the same output in output window as first script. So I let it run on the large file and looked on status bar to see the progress. It was faster and so I decided to let it run. But after nearly two hours I canceled again the script as it has processed only the first 39,835 lines up to this moment.

      A text editor like UltraEdit even with scripting support is the wrong application for this task.

      It would be a trivial task for me as C/C++ programmer as my main job to write a small C++ executable which opens the file, reads in a loop one line after the other from the file, calculates for each line a hash value, looks up the hash value in current hash table which is very fast to find out if the current line is a duplicate of a previous line, output the line in case of being a duplicate, or add the hash value of current line to hash table on being unique up to now, before continuation with next line from file, until all lines of file have been processed by the executable. I am quite sure the code for such an executable written in C++ with the usage of a library for hash calculation and hash table lookup would not be longer than about 25-75 lines. And the executable would process the large test file definitely in just some seconds for producing the same output as the scripts above. The output could be captured by UltraEdit to output window for usage in UltraEdit with Ctrl+Shift+Up/Down Arrow. But this is an UltraEdit user-to-user forum and not a C++ coding forum. Therefore I don't write the C++ code for you and post it here. You would also need the compiler and the library I would have used to be able to compile the code to an executable.
      Best regards from an UC/UE/UES for Windows user from Austria

      221
      Basic UserBasic User
      221

        Oct 26, 2018#3

        Thanks for your time and your effort.

        Here is another alternative.

        I did that with Excel and vlookup (one column for the original entries and the other for the entries to be searched in the first) and output of the function in the third column, which is very fast and working great. I though there was an alternative to use in an editor like UltraEdit.

        6,675585
        Grand MasterGrand Master
        6,675585

          Oct 26, 2018#4

          Well, if you are interested in finding lines in a large file of a just a small set of well known lines like 20 lines, UltraEdit would be also very fast to find them in the large file. A single Perl regular expression find could be used most likely for that task if the lines are short. But you asked for finding any non consecutive duplicate lines in large file and not just a well known small set of lines. I posted several macro and script solutions for a task like searching in file A for lines or strings listed in file B, the last one just six days ago, see Need help comparing two files and outputting to a new file the matching items.
          Best regards from an UC/UE/UES for Windows user from Austria

          221
          Basic UserBasic User
          221

            Oct 26, 2018#5

            Actually the lines I want to find are a bit less than 1000, and yes, they are known beforehand, I'll look at the link too, thanks.

            No, the script in referenced link doesn't work for me either.

            Question:Why don't we take advantage of the sort?

            I mean the search should finish when word2 > word1, when a word is after that alphabetically, and continue to the next word.

            6,675585
            Grand MasterGrand Master
            6,675585

              Oct 27, 2018#6

              The script posted in referenced topic does definitely not work for your task without adaptation. Read the requirements for the linked script and you understand why.

              You wrote initially about lines and posted an example containing only one word per line. It is unclear for me if your real file contains really only one word per line and of which characters these words can consist of, i.e. the valid character class for the word on each line. For that reason it is still unknown for me if a regular expression find could be used or not which makes a big difference. As long as a regular expression find is not possible, it is not possible to interpret the byte stream of the text file as list of lines with one word per line. So the scripts posted above search for a series of bytes (= string) which can occur anywhere in byte stream of file (= file content) which is one reason why the search for duplicate lines is so slow.

              The questioner of referenced topic uploaded two files with real data which was very important on developing the script.

              I could see in file NPIL.csv that there are integer numbers with 10 digits and other integer numbers with 11 digits. This was important to know as it means I needed to added regular expression \b to make sure that for example a line in file NPIListing.csv starting with 10638310063, is not found on searching for a line with integer number 1063831006 read from file NPIL.csv in first data column of file NPIListing.csv. I could have used on search also , instead of \b, but I wanted to write the script independent on separator used in CSV file NPIListing.csv to make it not more restrictive than really necessary.

              Further I could see on looking on NPIL.csv that there are also values like 7.601E+12. I am quite sure this should be 64-bit integer numbers with 12 digits, but Microsoft Excel, or whatever application was used to create the CSV file, interpreted the 12 digit integer number as floating point number and so the CSV file contains this floating point number which the script has to process. The character . means in a Perl regular expression find string: any character except newline characters. The character + means in a Perl regular expression find string: previous character or expression one or more times. But the goal was to search for the string 7.601E+12 and not for a string starting with 7, any other character except a newline character, next the string 601, then character E one or more times and ending finally with the two characters 12 which would match for example 7x601EEE12, but not 7.601E+12. For that reason the script file contains the line sFind += asNumbers[nNumber].replace(/([.+])/g,"\\$1"); to escape each . and + with a backslash to search for 7\.601E\+12 which really matches only the string 7.601E+12.

              A script designed for general tasks can be never optimized on speed. Optimization is only possible on knowing exactly the task requirements and which data to process.
              Best regards from an UC/UE/UES for Windows user from Austria

              221
              Basic UserBasic User
              221

                Oct 27, 2018#7

                Okay, I understand.

                Well, here's one of the files I'm working on. The first 914 lines are my "white list" and I would like to be able to find which ones (of those in white list) are included in the rest of the file (from line 916 to 54760).
                spark (1.07 MiB)   23

                6,675585
                Grand MasterGrand Master
                6,675585

                  Oct 28, 2018#8

                  You could have saved both of us a lot of time on explaining the task better from the beginning with real data.

                  The real task requirements are:
                  1. A text file should be processed with a file size about 1 to 2 MiB.
                  2. The text file contains only ASCII characters, no Unicode characters.
                  3. The text file contains about 60,000 to 100,000 lines.
                  4. Each line of the text file contains a domain URL.
                    So the lines are short and contain dots, hyphens and other characters valid for a URL.
                  5. The text file contains two blocks separated by a single blank line.
                  6. The upper block above the blank line contains line by line the URLs to look up in the lower block below the blank line.
                  7. The upper block can have 1 to about 2,000 lines respectively URLs. The lower block is much larger and contains thousands of URLs.
                  8. Both blocks are alphabetically sorted with no duplicates in upper block and no duplicates in lower block.
                  9. A macro/script compatible with UltraEdit v16.30 should search in lower block for each line listed in upper block and report to a new file or output window which lines in upper block exist also in lower block.
                  The script below is written for that task. It was tested with UltraEdit v16.30.

                  It is optimized for small files which can be loaded completely into memory to avoid window refreshes during script execution. A file with not more than some MiB can be usually loaded by a script into memory as long as string copies are avoided during script execution.

                  The script is also optimized on searching in lower block for a line in upper block regarding to sorted lines in both blocks. The script would produce a wrong result if the lines/URLs in both blocks are not sorted correct as expected. For that reason the script quickly checks on sorted lines in both blocks during execution and aborts if a line is found of which first character has a lower code value than the first character on previous line in same block. The lines must not be perfectly sorted because this is not the case in example file. But series of lines starting with 0 (decimal code value 48) must be before a series of lines starting with a (decimal code value 97) which must be before as series of lines starting with b  (decimal code value 98) and so on.

                  The script reports twice a sort error on execution on example file. The first one on line 10323 with _autodiscover._tcp.agkn.com, and after fixing this line (by removing the underscores), the second one on line 58 with <Rule>api.pushover.net after two leading tabs.

                  Code: Select all

                  function Lookup()
                  {
                     var sLineTerm;
                     if (UltraEdit.activeDocument.lineTerminator < 1) sLineTerm = "^p^p";
                     else if (UltraEdit.activeDocument.lineTerminator == 2) sLineTerm = "^n^n";
                     else sLineTerm = "^r^r";
                  
                     // Define the non regular expression Find parameters to find
                     // the blank line between the two blocks in the active file.
                     UltraEdit.ueReOn();
                     UltraEdit.activeDocument.findReplace.mode=0;
                     UltraEdit.activeDocument.findReplace.matchCase=true;
                     UltraEdit.activeDocument.findReplace.matchWord=false;
                     UltraEdit.activeDocument.findReplace.regExp=false;
                     UltraEdit.activeDocument.findReplace.searchDown=true;
                     UltraEdit.activeDocument.findReplace.searchInColumn=false;
                  
                     // Prepare the active output window for the find results
                     // or the error messages in case of an error is detected.
                     UltraEdit.outputWindow.clear();
                     UltraEdit.outputWindow.showWindow(false);
                  
                     // Find first blank line in active file.
                     if (!UltraEdit.activeDocument.findReplace.find(sLineTerm))
                     {
                        UltraEdit.outputWindow.write("ERROR: No block found at top of file which is terminated with an empty line.");
                        UltraEdit.outputWindow.write("");
                        UltraEdit.outputWindow.write("Script execution aborted.");
                        UltraEdit.outputWindow.showWindow(true);
                        UltraEdit.outputWindow.showStatus=false;
                        return;
                     }
                  
                     // Load this block into memory into an array of strings
                     // with each string being a line without line termination.
                     if (UltraEdit.activeDocument.lineTerminator < 1) sLineTerm = "\r\n";
                     else if (UltraEdit.activeDocument.lineTerminator == 2) sLineTerm = "\n";
                     else sLineTerm = "\r";
                  
                     UltraEdit.activeDocument.selectToTop();
                     var asLinesToFind = UltraEdit.activeDocument.selection.split(sLineTerm);
                     if (!asLinesToFind[asLinesToFind.length-1].length) asLinesToFind.pop();
                     if (!asLinesToFind[asLinesToFind.length-1].length) asLinesToFind.pop();
                  
                     // The number of strings in array incremented by two is the
                     // line number of the first line of the remaining lines in file.
                     var nLineNumberStart = asLinesToFind.length + 2;
                  
                     // Cancel the existing selection and set caret to beginning of the
                     // block after first blank line and select everything to end of file.
                     UltraEdit.activeDocument.gotoLine(nLineNumberStart,1);
                     UltraEdit.activeDocument.selectToBottom();
                  
                     // Is there nothing selected?
                     if (!UltraEdit.activeDocument.isSel())
                     {
                        UltraEdit.outputWindow.write("ERROR: No block found to look up for the lines above.");
                        UltraEdit.outputWindow.write("");
                        UltraEdit.outputWindow.write("Script execution aborted.");
                        UltraEdit.outputWindow.showWindow(true);
                        UltraEdit.outputWindow.showStatus=false;
                        return;
                     }
                  
                     // Load all those lines into memory into another array of strings.
                     var asLinesToLookup = UltraEdit.activeDocument.selection.split(sLineTerm);
                  
                     // Remove the last string in array if this is an empty string
                     // because of last line in file has also a line termination.
                     if (!asLinesToLookup[asLinesToLookup.length-1].length) asLinesToLookup.pop();
                  
                     // Cancel the selection and move caret back to top of file.
                     UltraEdit.activeDocument.top();
                  
                     var nFirstLine = 0;
                     var nArrayIndex = 0;
                     var nCurrentCharCode;
                     var anCodeLineNumbers = [];
                     var nLastCharCode = asLinesToLookup[0].charCodeAt(0);
                     var sFileName = UltraEdit.activeDocument.path + "(";
                  
                     // Create an array for each character code of first character of the
                     // lines to look up with: character code, first line number having this
                     // character as first character and line number of the first line NOT
                     // having this character as first character. Verify during creation
                     // of this special array that the lines are really correct sorted.
                     for (var nLine = 1; nLine < asLinesToLookup.length; nLine++)
                     {
                        nCurrentCharCode = asLinesToLookup[nLine].charCodeAt(0);
                        if (nCurrentCharCode > nLastCharCode)
                        {
                           anCodeLineNumbers[nArrayIndex] = [nLastCharCode, nFirstLine, nLine];
                           nFirstLine = nLine;
                           nLastCharCode = nCurrentCharCode;
                           nArrayIndex++;
                        }
                        else if (nCurrentCharCode < nLastCharCode)
                        {
                           var nWrongLine = nLine + nLineNumberStart;
                           var nPrevLine = nWrongLine - 1;
                           UltraEdit.outputWindow.write("ERROR: First character of line " + nWrongLine +
                                                        " has a lower code value than first character of line " +
                                                        nPrevLine + ".");
                           UltraEdit.outputWindow.write("");
                           UltraEdit.outputWindow.write(sFileName + nPrevLine + "): " + asLinesToLookup[nLine-1]);
                           UltraEdit.outputWindow.write(sFileName + nWrongLine + "): " + asLinesToLookup[nLine]);
                           UltraEdit.outputWindow.write("");
                           UltraEdit.outputWindow.write("The lines to are not correct sorted in the file.");
                           UltraEdit.outputWindow.write("Script execution aborted.");
                           UltraEdit.outputWindow.showWindow(true);
                           UltraEdit.outputWindow.showStatus=false;
                           return;
                        }
                     }
                     anCodeLineNumbers[nArrayIndex] = [nLastCharCode, nFirstLine, nLine];
                  
                     nArrayIndex = 0;
                     nLastCharCode = -1;
                     var nDuplicates = 0;
                  
                     // Run this loop for each line to find in the appropriate block
                     // of remaining lines in file starting with same first character.
                     for (var nLine = 0; nLine < asLinesToFind.length; nLine++)
                     {
                        // Get code value of first character of line to find.
                        nCurrentCharCode = asLinesToFind[nLine].charCodeAt(0);
                  
                        // Is the character code of first character of this line lower
                        // than the character code of first character of previous line?
                        if (nCurrentCharCode < nLastCharCode)
                        {
                           var nWrongLine = nLine + 1;
                           UltraEdit.outputWindow.clear();
                           UltraEdit.outputWindow.write("ERROR: First character of line " + nWrongLine +
                                                        " has a lower code value than first character of line " +
                                                        nLine + ".");
                           UltraEdit.outputWindow.write("");
                           UltraEdit.outputWindow.write(sFileName + nLine + "): " + asLinesToFind[nLine-1]);
                           UltraEdit.outputWindow.write(sFileName + nWrongLine + "): " + asLinesToFind[nLine]);
                           UltraEdit.outputWindow.write("");
                           UltraEdit.outputWindow.write("The lines to are not correct sorted in the file.");
                           UltraEdit.outputWindow.write("Script execution aborted.");
                           UltraEdit.outputWindow.showWindow(true);
                           UltraEdit.outputWindow.showStatus=false;
                           return;
                        }
                        nLastCharCode = nCurrentCharCode;
                  
                        while (nArrayIndex < anCodeLineNumbers.length)
                        {
                           // Has the next block in lines to look up as first character
                           // a character with a code value lower than first character
                           // of the current line to find?
                           if (anCodeLineNumbers[nArrayIndex][0] < nCurrentCharCode)
                           {
                              nArrayIndex++; // Skip all look up lines with a first character
                              continue;      // no line to find has as first character.
                           }
                           // Has the next block in lines to look up as first character
                           // a character with a code value greater than first character
                           // of the current line to find?
                           if (anCodeLineNumbers[nArrayIndex][0] > nCurrentCharCode)
                           {
                              break;         // Skip all lines to find with a first character
                           }                 // no line to look up has as first character.
                  
                           // There is at least one line in the lines to look up with first
                           // character being equal the first character of the line to find.
                           var nLookupLine = anCodeLineNumbers[nArrayIndex][1];
                           var nBreakLine = anCodeLineNumbers[nArrayIndex][2];
                           var sLineToFind = asLinesToFind[nLine];
                  
                           do // Compare case-sensitive the line to find in block of lines
                           {  // to look up starting with same character as the line to find.
                              if (sLineToFind == asLinesToLookup[nLookupLine])
                              {
                                 var nLineToFind = nLine + 1;
                                 var nLineFound = nLookupLine + nLineNumberStart;
                                 // Insert an empty line before a new series of duplicate
                                 // lines except on first series of duplicate lines.
                                 if (nDuplicates) UltraEdit.outputWindow.write("");
                                 // Output the searched line and the found line with line nubmers.
                                 UltraEdit.outputWindow.write(sFileName + nLineToFind + "): " + sLineToFind);
                                 UltraEdit.outputWindow.write(sFileName + nLineFound + "): " + asLinesToLookup[nLookupLine]);
                                 nDuplicates++;
                                 break;
                              }
                           }
                           while((++nLookupLine) < nBreakLine);
                           break;
                        }
                     }
                  
                     // Output a summary information at bottom of output window.
                     var sPluralS = (nDuplicates != 1) ? "s." : ".";
                     if (nDuplicates) UltraEdit.outputWindow.write("");
                     UltraEdit.outputWindow.write("Found " + nDuplicates + " duplicate" + sPluralS);
                     UltraEdit.outputWindow.showWindow(true);
                     UltraEdit.outputWindow.showStatus=false;
                  }
                  
                  if (UltraEdit.document.length > 0)  // Is any file opened?
                  {
                     // Define environment for this script.
                     UltraEdit.insertMode();
                     if (typeof(UltraEdit.columnModeOff) == "function") UltraEdit.columnModeOff();
                     else if (typeof(UltraEdit.activeDocument.columnModeOff) == "function") UltraEdit.activeDocument.columnModeOff();
                     // Move caret to top of active file.
                     UltraEdit.activeDocument.top();
                     Lookup();
                  }
                  
                  The script outputs on running it on (fixed) example file within one second:

                  Code: Select all

                  C:\Temp\spark(69): appbrain.com
                  C:\Temp\spark(9534): appbrain.com
                  
                  C:\Temp\spark(137): cdn.syndication.twitter.com
                  C:\Temp\spark(13760): cdn.syndication.twitter.com
                  
                  C:\Temp\spark(527): pipe.skype.com
                  C:\Temp\spark(34423): pipe.skype.com
                  
                  C:\Temp\spark(557): quantcast.mgr.consensu.org
                  C:\Temp\spark(36462): quantcast.mgr.consensu.org
                  
                  C:\Temp\spark(701): tinypic.com
                  C:\Temp\spark(43769): tinypic.com
                  
                  C:\Temp\spark(819): www.googletagmanager.com
                  C:\Temp\spark(50908): www.googletagmanager.com
                  
                  C:\Temp\spark(820): www.googletagservices.com
                  C:\Temp\spark(50910): www.googletagservices.com
                  
                  Found 7 duplicates.
                  
                  Best regards from an UC/UE/UES for Windows user from Austria

                  221
                  Basic UserBasic User
                  221

                    Oct 28, 2018#9

                    Yes, it works absolutely great!
                    My apologies for not explaining earlier, thanks a lot!

                    Edit:

                    I think, I need also a "preparation" beforehand, which:
                    • deletes comments (starting with # - start or end of line),
                    • removes all the "0.0.0.0*" or "127.0.0.1*" where * can be a space, a tab, or more from each,
                    • sorts the two blocks (or at least the second),
                    • converts all to lowercase,
                    • converts line terminators to DOS and save the file with DOS format (ANSI/ASCII).
                    Please could you add those to the script?

                    6,675585
                    Grand MasterGrand Master
                    6,675585

                      Oct 29, 2018#10

                      Here is the script with file preparation before lookup.

                      Code: Select all

                      function PrepareAndLookup()
                      {
                         var sLineTerm;
                         if (UltraEdit.activeDocument.lineTerminator < 1) sLineTerm = "^p^p";
                         else if (UltraEdit.activeDocument.lineTerminator == 2) sLineTerm = "^n^n";
                         else sLineTerm = "^r^r";
                      
                         // Define the non regular expression Find parameters to find
                         // the blank line between the two blocks in the active file.
                         UltraEdit.ueReOn();
                         UltraEdit.activeDocument.findReplace.mode=0;
                         UltraEdit.activeDocument.findReplace.matchCase=true;
                         UltraEdit.activeDocument.findReplace.matchWord=false;
                         UltraEdit.activeDocument.findReplace.regExp=false;
                         UltraEdit.activeDocument.findReplace.searchDown=true;
                         UltraEdit.activeDocument.findReplace.searchInColumn=false;
                      
                         // Prepare the active output window for the find results
                         // or the error messages in case of an error is detected.
                         UltraEdit.outputWindow.clear();
                         UltraEdit.outputWindow.showWindow(false);
                      
                         // Find first blank line in active file.
                         if (!UltraEdit.activeDocument.findReplace.find(sLineTerm))
                         {
                            UltraEdit.outputWindow.write("ERROR: No block found at top of file which is terminated with an empty line.");
                            UltraEdit.outputWindow.write("");
                            UltraEdit.outputWindow.write("Script execution aborted.");
                            UltraEdit.outputWindow.showWindow(true);
                            UltraEdit.outputWindow.showStatus=false;
                            return;
                         }
                      
                      // --- File preparation start ----------------------------------------------
                      
                         // Make sure the last line of the file has a line termination.
                         UltraEdit.activeDocument.bottom();
                         if (UltraEdit.activeDocument.isColNumGt(1))
                         {
                            UltraEdit.activeDocument.insertLine();
                            if (UltraEdit.activeDocument.isColNumGt(1))
                            {
                               UltraEdit.activeDocument.deleteToStartOfLine();
                            }
                         }
                      
                         // Go to top of file and delete all traiinling spaces/tabs.
                         UltraEdit.activeDocument.top();
                         UltraEdit.activeDocument.trimTrailingSpaces();
                      
                         // Convert line endings to DOS if file is a UNIX or MAC file.
                         if (UltraEdit.activeDocument.lineTerminator != 0)
                         {
                            UltraEdit.activeDocument.unixMacToDos();
                         }
                      
                         // Remove all lines starting or ending with #.
                         UltraEdit.activeDocument.findReplace.regExp=true;
                         UltraEdit.activeDocument.findReplace.preserveCase=false;
                         UltraEdit.activeDocument.findReplace.replaceAll=true;
                         UltraEdit.activeDocument.findReplace.replaceInAllOpen=false;
                         UltraEdit.activeDocument.findReplace.replace("%#*^p","");
                         UltraEdit.activeDocument.findReplace.replace("%*#^p","");
                      
                         // Remove all lines with 0.0.0.0 or 127.0.0.1.
                         UltraEdit.activeDocument.findReplace.replace("%^{0.0.0.0^}^{127.0.0.1^}^p","");
                      
                         // Convert everything in file to lower case.
                         UltraEdit.activeDocument.selectAll();
                         UltraEdit.activeDocument.toLower();
                      
                         // Find again the blank line after perhaps deleting lines.
                         UltraEdit.activeDocument.findReplace.regExp=false;
                         UltraEdit.activeDocument.top();
                         UltraEdit.activeDocument.findReplace.find("^p^p");
                      
                         // Get the line number of the line below the blank line.
                         UltraEdit.activeDocument.key("HOME");
                         var nLineNumberStart = UltraEdit.activeDocument.currentLineNum;
                      
                         // Sort the lines below blank line with removing duplicates.
                         UltraEdit.activeDocument.gotoLine(nLineNumberStart,1);
                         UltraEdit.activeDocument.selectToBottom();
                         // Is there nothing selected below first blank line?
                         if (!UltraEdit.activeDocument.isSel())
                         {
                            UltraEdit.outputWindow.write("ERROR: No block found to sort below blank line.");
                            UltraEdit.outputWindow.write("");
                            UltraEdit.outputWindow.write("Script execution aborted.");
                            UltraEdit.outputWindow.showWindow(true);
                            UltraEdit.outputWindow.showStatus=false;
                            return;
                         }
                         UltraEdit.activeDocument.sortAsc(0,false,true,1,-1);
                      
                         // Sort the lines above blank line with removing duplicates.
                         UltraEdit.activeDocument.gotoLine(nLineNumberStart-1,1);
                         UltraEdit.activeDocument.selectToTop();
                         UltraEdit.activeDocument.sortAsc(0,false,true,1,-1);
                      
                         // Save the file.
                         if (!UltraEdit.activeDocument.isName("")) UltraEdit.save();
                      
                      // --- File preparation end ------------------------------------------------
                      
                         // The file has definitely now DOS line endings.
                         sLineTerm = "\r\n";
                      
                         // Line number of first line below blank line is already well known.
                         UltraEdit.activeDocument.gotoLineSelect(nLineNumberStart-1,1);
                      
                         var asLinesToFind = UltraEdit.activeDocument.selection.split(sLineTerm);
                         // Remove the last empty string.
                         asLinesToFind.pop();
                      
                         // Cancel the existing selection and set caret to beginning of the
                         // block after first blank line and select everything to end of file.
                         UltraEdit.activeDocument.gotoLine(nLineNumberStart,1);
                         UltraEdit.activeDocument.selectToBottom();
                      
                         // Is there nothing selected?
                         if (!UltraEdit.activeDocument.isSel())
                         {
                            UltraEdit.outputWindow.write("ERROR: No block found to look up for the lines above.");
                            UltraEdit.outputWindow.write("");
                            UltraEdit.outputWindow.write("Script execution aborted.");
                            UltraEdit.outputWindow.showWindow(true);
                            UltraEdit.outputWindow.showStatus=false;
                            return;
                         }
                      
                         // Load all those lines into memory into another array of strings.
                         var asLinesToLookup = UltraEdit.activeDocument.selection.split(sLineTerm);
                      
                         // Remove the last string in array because of the last
                         // line in file has definitely also a line termination.
                         asLinesToLookup.pop();
                      
                         // Cancel the selection and move caret back to top of file.
                         UltraEdit.activeDocument.top();
                      
                         var nFirstLine = 0;
                         var nArrayIndex = 0;
                         var nCurrentCharCode;
                         var anCodeLineNumbers = [];
                         var nLastCharCode = asLinesToLookup[0].charCodeAt(0);
                         var sFileName = UltraEdit.activeDocument.path + "(";
                      
                         // Create an array for each character code of first character of the
                         // lines to look up with: character code, first line number having this
                         // character as first character and line number of the first line NOT
                         // having this character as first character. Verify during creation
                         // of this special array that the lines are really correct sorted.
                         for (var nLine = 1; nLine < asLinesToLookup.length; nLine++)
                         {
                            nCurrentCharCode = asLinesToLookup[nLine].charCodeAt(0);
                            if (nCurrentCharCode > nLastCharCode)
                            {
                               anCodeLineNumbers[nArrayIndex] = [nLastCharCode, nFirstLine, nLine];
                               nFirstLine = nLine;
                               nLastCharCode = nCurrentCharCode;
                               nArrayIndex++;
                            }
                      /*    else if (nCurrentCharCode < nLastCharCode)
                            {
                               var nWrongLine = nLine + nLineNumberStart;
                               var nPrevLine = nWrongLine - 1;
                               UltraEdit.outputWindow.write("ERROR: First character of line " + nWrongLine +
                                                            " has a lower code value than first character of line " +
                                                            nPrevLine + ".");
                               UltraEdit.outputWindow.write("");
                               UltraEdit.outputWindow.write(sFileName + nPrevLine + "): " + asLinesToLookup[nLine-1]);
                               UltraEdit.outputWindow.write(sFileName + nWrongLine + "): " + asLinesToLookup[nLine]);
                               UltraEdit.outputWindow.write("");
                               UltraEdit.outputWindow.write("The lines to are not correct sorted in the file.");
                               UltraEdit.outputWindow.write("Script execution aborted.");
                               UltraEdit.outputWindow.showWindow(true);
                               UltraEdit.outputWindow.showStatus=false;
                               return;
                            } */
                         }
                         anCodeLineNumbers[nArrayIndex] = [nLastCharCode, nFirstLine, nLine];
                      
                         nArrayIndex = 0;
                      /* nLastCharCode = -1; */
                         var nDuplicates = 0;
                      
                         // Run this loop for each line to find in the appropriate block
                         // of remaining lines in file starting with same first character.
                         for (var nLine = 0; nLine < asLinesToFind.length; nLine++)
                         {
                            // Get code value of first character of line to find.
                            nCurrentCharCode = asLinesToFind[nLine].charCodeAt(0);
                      /*
                            // Is the character code of first character of this line lower
                            // than the character code of first character of previous line?
                            if (nCurrentCharCode < nLastCharCode)
                            {
                               var nWrongLine = nLine + 1;
                               UltraEdit.outputWindow.clear();
                               UltraEdit.outputWindow.write("ERROR: First character of line " + nWrongLine +
                                                            " has a lower code value than first character of line " +
                                                            nLine + ".");
                               UltraEdit.outputWindow.write("");
                               UltraEdit.outputWindow.write(sFileName + nLine + "): " + asLinesToFind[nLine-1]);
                               UltraEdit.outputWindow.write(sFileName + nWrongLine + "): " + asLinesToFind[nLine]);
                               UltraEdit.outputWindow.write("");
                               UltraEdit.outputWindow.write("The lines to are not correct sorted in the file.");
                               UltraEdit.outputWindow.write("Script execution aborted.");
                               UltraEdit.outputWindow.showWindow(true);
                               UltraEdit.outputWindow.showStatus=false;
                               return;
                            }
                            nLastCharCode = nCurrentCharCode;
                      */
                            while (nArrayIndex < anCodeLineNumbers.length)
                            {
                               // Has the next block in lines to look up as first character
                               // a character with a code value lower than first character
                               // of the current line to find?
                               if (anCodeLineNumbers[nArrayIndex][0] < nCurrentCharCode)
                               {
                                  nArrayIndex++; // Skip all look up lines with a first character
                                  continue;      // no line to find has as first character.
                               }
                               // Has the next block in lines to look up as first character
                               // a character with a code value greater than first character
                               // of the current line to find?
                               if (anCodeLineNumbers[nArrayIndex][0] > nCurrentCharCode)
                               {
                                  break;         // Skip all lines to find with a first character
                               }                 // no line to look up has as first character.
                      
                               // There is at least one line in the lines to look up with first
                               // character being equal the first character of the line to find.
                               var nLookupLine = anCodeLineNumbers[nArrayIndex][1];
                               var nBreakLine = anCodeLineNumbers[nArrayIndex][2];
                               var sLineToFind = asLinesToFind[nLine];
                      
                               do // Compare case-sensitive the line to find in block of lines
                               {  // to look up starting with same character as the line to find.
                                  if (sLineToFind == asLinesToLookup[nLookupLine])
                                  {
                                     var nLineToFind = nLine + 1;
                                     var nLineFound = nLookupLine + nLineNumberStart;
                                     // Insert an empty line before a new series of duplicate
                                     // lines except on first series of duplicate lines.
                                     if (nDuplicates) UltraEdit.outputWindow.write("");
                                     // Output the searched line and the found line with line nubmers.
                                     UltraEdit.outputWindow.write(sFileName + nLineToFind + "): " + sLineToFind);
                                     UltraEdit.outputWindow.write(sFileName + nLineFound + "): " + asLinesToLookup[nLookupLine]);
                                     nDuplicates++;
                                     break;
                                  }
                               }
                               while((++nLookupLine) < nBreakLine);
                               break;
                            }
                         }
                      
                         // Output a summary information at bottom of output window.
                         var sPluralS = (nDuplicates != 1) ? "s." : ".";
                         if (nDuplicates) UltraEdit.outputWindow.write("");
                         UltraEdit.outputWindow.write("Found " + nDuplicates + " duplicate" + sPluralS);
                         UltraEdit.outputWindow.showWindow(true);
                         UltraEdit.outputWindow.showStatus=false;
                      }
                      
                      if (UltraEdit.document.length > 0)  // Is any file opened?
                      {
                         // Define environment for this script.
                         UltraEdit.insertMode();
                         if (typeof(UltraEdit.columnModeOff) == "function") UltraEdit.columnModeOff();
                         else if (typeof(UltraEdit.activeDocument.columnModeOff) == "function") UltraEdit.activeDocument.columnModeOff();
                         // Move caret to top of active file.
                         UltraEdit.activeDocument.top();
                         PrepareAndLookup();
                      }
                      
                      I created my on example file for testing. So I can only hope that the script prepares the active file as expected by you. Otherwise read the comments in section preparing the file and adapt the code in this section.

                      The script is still not 100% fail safe. For example a file with two blank lines at top can produce wrong result.
                      Best regards from an UC/UE/UES for Windows user from Austria

                      221
                      Basic UserBasic User
                      221

                        Oct 29, 2018#11

                        This part is not what I really wanted:

                        Code: Select all

                          // Remove all lines with 0.0.0.0 or 127.0.0.1.
                           UltraEdit.activeDocument.findReplace.replace("%^{0.0.0.0^}^{127.0.0.1^}^p","");
                        I don't want to remove those lines, but cut the front part.

                        And comments (starting with #) in the end weren't deleted, e.g. in

                        Code: Select all

                        0.0.0.0 00fun.com #[tracking.cookie]

                          Oct 30, 2018#12

                          Okay, I fixed those two and I did some optimization too:

                          Code: Select all

                          function PrepareAndLookup()
                          {
                             // Convert line endings to DOS if file is a UNIX or MAC file.
                             if (UltraEdit.activeDocument.lineTerminator != 0)
                             {
                                UltraEdit.activeDocument.unixMacToDos();
                             }
                          
                             var sLineTerm;
                             if (UltraEdit.activeDocument.lineTerminator < 1) sLineTerm = "^p^p";
                             else if (UltraEdit.activeDocument.lineTerminator == 2) sLineTerm = "^n^n";
                             else sLineTerm = "^r^r";
                          
                             // Define the non regular expression Find parameters to find
                             // the blank line between the two blocks in the active file.
                             UltraEdit.ueReOn();
                             UltraEdit.activeDocument.findReplace.mode=0;
                             UltraEdit.activeDocument.findReplace.matchCase=true;
                             UltraEdit.activeDocument.findReplace.matchWord=false;
                             UltraEdit.activeDocument.findReplace.regExp=false;
                             UltraEdit.activeDocument.findReplace.searchDown=true;
                             UltraEdit.activeDocument.findReplace.searchInColumn=false;
                          
                             // Prepare the active output window for the find results
                             // or the error messages in case of an error is detected.
                             UltraEdit.outputWindow.clear();
                             UltraEdit.outputWindow.showWindow(false);
                          
                             // Find first blank line in active file.
                             if (!UltraEdit.activeDocument.findReplace.find(sLineTerm))
                             {
                                UltraEdit.outputWindow.write("ERROR: No block found at top of file which is terminated with an empty line.");
                                UltraEdit.outputWindow.write("");
                                UltraEdit.outputWindow.write("Script execution aborted.");
                                UltraEdit.outputWindow.showWindow(true);
                                UltraEdit.outputWindow.showStatus=false;
                                return;
                             }
                          
                          // --- File preparation start ----------------------------------------------
                          
                             // Make sure the last line of the file has a line termination.
                             UltraEdit.activeDocument.bottom();
                             if (UltraEdit.activeDocument.isColNumGt(1))
                             {
                                UltraEdit.activeDocument.insertLine();
                                if (UltraEdit.activeDocument.isColNumGt(1))
                                {
                                   UltraEdit.activeDocument.deleteToStartOfLine();
                                }
                             }
                          
                             // Go to top of file and delete all trailing spaces/tabs.
                             UltraEdit.activeDocument.top();
                             UltraEdit.activeDocument.trimTrailingSpaces();
                          
                             // Remove all lines starting or ending with #.
                             UltraEdit.activeDocument.findReplace.regExp=true;
                             UltraEdit.activeDocument.findReplace.preserveCase=false;
                             UltraEdit.activeDocument.findReplace.replaceAll=true;
                             UltraEdit.activeDocument.findReplace.replaceInAllOpen=false;
                             UltraEdit.activeDocument.findReplace.replace("%#*^p","");
                             UltraEdit.activeDocument.findReplace.replace("#*^p","^p");
                             UltraEdit.activeDocument.findReplace.replace("%*#^p","");
                          
                             // Remove all lines with 0.0.0.0 or 127.0.0.1.
                             UltraEdit.activeDocument.findReplace.replace("%^127.0.0.1*[ ^t^b]","");
                             UltraEdit.activeDocument.findReplace.replace("%^0.0.0.0*[ ^t^b]","");
                          //   UltraEdit.activeDocument.findReplace.replace("%^{0.0.0.0^}^{127.0.0.1^}^p","");
                          
                             // Convert everything in file to lower case.
                             UltraEdit.activeDocument.selectAll();
                             UltraEdit.activeDocument.toLower();
                          
                             // Find again the blank line after perhaps deleting lines.
                             UltraEdit.activeDocument.findReplace.regExp=false;
                             UltraEdit.activeDocument.top();
                             UltraEdit.activeDocument.findReplace.find("^p^p");
                          
                             // Get the line number of the line below the blank line.
                             UltraEdit.activeDocument.key("HOME");
                             var nLineNumberStart = UltraEdit.activeDocument.currentLineNum;
                          
                             // Sort the lines below blank line with removing duplicates.
                             UltraEdit.activeDocument.gotoLine(nLineNumberStart,1);
                             UltraEdit.activeDocument.selectToBottom();
                             // Is there nothing selected below first blank line?
                             if (!UltraEdit.activeDocument.isSel())
                             {
                                UltraEdit.outputWindow.write("ERROR: No block found to sort below blank line.");
                                UltraEdit.outputWindow.write("");
                                UltraEdit.outputWindow.write("Script execution aborted.");
                                UltraEdit.outputWindow.showWindow(true);
                                UltraEdit.outputWindow.showStatus=false;
                                return;
                             }
                             UltraEdit.activeDocument.trimTrailingSpaces();
                             UltraEdit.activeDocument.sortAsc(0,false,true,1,-1);
                          
                             // Sort the lines above blank line with removing duplicates.
                             UltraEdit.activeDocument.gotoLine(nLineNumberStart-1,1);
                             UltraEdit.activeDocument.selectToTop();
                             UltraEdit.activeDocument.sortAsc(0,false,true,1,-1);
                          
                             // Save the file.
                          //   if (!UltraEdit.activeDocument.isName("")) UltraEdit.save();
                          
                          // --- File preparation end ------------------------------------------------
                          
                             // The file has definitely now DOS line endings.
                             sLineTerm = "\r\n";
                          
                             // Line number of first line below blank line is already well known.
                             UltraEdit.activeDocument.gotoLineSelect(nLineNumberStart-1,1);
                          
                             var asLinesToFind = UltraEdit.activeDocument.selection.split(sLineTerm);
                             // Remove the last empty string.
                             asLinesToFind.pop();
                          
                             // Cancel the existing selection and set caret to beginning of the
                             // block after first blank line and select everything to end of file.
                             UltraEdit.activeDocument.gotoLine(nLineNumberStart,1);
                             UltraEdit.activeDocument.selectToBottom();
                          
                             // Is there nothing selected?
                             if (!UltraEdit.activeDocument.isSel())
                             {
                                UltraEdit.outputWindow.write("ERROR: No block found to look up for the lines above.");
                                UltraEdit.outputWindow.write("");
                                UltraEdit.outputWindow.write("Script execution aborted.");
                                UltraEdit.outputWindow.showWindow(true);
                                UltraEdit.outputWindow.showStatus=false;
                                return;
                             }
                          
                             // Load all those lines into memory into another array of strings.
                             var asLinesToLookup = UltraEdit.activeDocument.selection.split(sLineTerm);
                          
                             // Remove the last string in array because of the last
                             // line in file has definitely also a line termination.
                             asLinesToLookup.pop();
                          
                             // Cancel the selection and move caret back to top of file.
                             UltraEdit.activeDocument.top();
                          
                             var nFirstLine = 0;
                             var nArrayIndex = 0;
                             var nCurrentCharCode;
                             var anCodeLineNumbers = [];
                             var nLastCharCode = asLinesToLookup[0].charCodeAt(0);
                             var sFileName = UltraEdit.activeDocument.path + "(";
                          
                             // Create an array for each character code of first character of the
                             // lines to look up with: character code, first line number having this
                             // character as first character and line number of the first line NOT
                             // having this character as first character. Verify during creation
                             // of this special array that the lines are really correct sorted.
                             for (var nLine = 1; nLine < asLinesToLookup.length; nLine++)
                             {
                                nCurrentCharCode = asLinesToLookup[nLine].charCodeAt(0);
                                if (nCurrentCharCode > nLastCharCode)
                                {
                                   anCodeLineNumbers[nArrayIndex] = [nLastCharCode, nFirstLine, nLine];
                                   nFirstLine = nLine;
                                   nLastCharCode = nCurrentCharCode;
                                   nArrayIndex++;
                                }
                             }
                             anCodeLineNumbers[nArrayIndex] = [nLastCharCode, nFirstLine, nLine];
                          
                             nArrayIndex = 0;
                          /* nLastCharCode = -1; */
                             var nDuplicates = 0;
                          
                             UltraEdit.outputWindow.clear();
                          
                             // Run this loop for each line to find in the appropriate block
                             // of remaining lines in file starting with same first character.
                             for (var nLine = 0; nLine < asLinesToFind.length; nLine++)
                             {
                                // Get code value of first character of line to find.
                                nCurrentCharCode = asLinesToFind[nLine].charCodeAt(0);
                                while (nArrayIndex < anCodeLineNumbers.length)
                                {
                                   // Has the next block in lines to look up as first character
                                   // a character with a code value lower than first character
                                   // of the current line to find?
                                   if (anCodeLineNumbers[nArrayIndex][0] < nCurrentCharCode)
                                   {
                                      nArrayIndex++; // Skip all look up lines with a first character
                                      continue;      // no line to find has as first character.
                                   }
                                   // Has the next block in lines to look up as first character
                                   // a character with a code value greater than first character
                                   // of the current line to find?
                                   if (anCodeLineNumbers[nArrayIndex][0] > nCurrentCharCode)
                                   {
                                      break;         // Skip all lines to find with a first character
                                   }                 // no line to look up has as first character.
                          
                                   // There is at least one line in the lines to look up with first
                                   // character being equal the first character of the line to find.
                                   var nLookupLine = anCodeLineNumbers[nArrayIndex][1];
                                   var nBreakLine = anCodeLineNumbers[nArrayIndex][2];
                                   var sLineToFind = asLinesToFind[nLine];
                          
                                   do // Compare case-sensitive the line to find in block of lines
                                   {  // to look up starting with same character as the line to find.
                                      if (sLineToFind == asLinesToLookup[nLookupLine])
                                      {
                                         var nLineToFind = nLine + 1;
                                         var nLineFound = nLookupLine + nLineNumberStart;
                                         // Insert an empty line before a new series of duplicate
                                         // lines except on first series of duplicate lines.
                                         if (nDuplicates) UltraEdit.outputWindow.write("");
                                         // Output the searched line and the found line with line nubmers.
                                         UltraEdit.outputWindow.write(sFileName + nLineToFind + "): " + sLineToFind);
                                         UltraEdit.outputWindow.write(sFileName + nLineFound + "): " + asLinesToLookup[nLookupLine]);
                                         nDuplicates++;
                                         break;
                                      }
                                   }
                                   while((++nLookupLine) < nBreakLine);
                                   break;
                                }
                             }
                          
                             // Output a summary information at bottom of output window.
                             var sPluralS = (nDuplicates != 1) ? "s." : ".";
                             if (nDuplicates) UltraEdit.outputWindow.write("");
                             UltraEdit.outputWindow.write("Found " + nDuplicates + " duplicate" + sPluralS);
                             UltraEdit.outputWindow.showWindow(true);
                             UltraEdit.outputWindow.showStatus=false;
                          
                             UltraEdit.outputWindow.cancelSelect();
                             UltraEdit.activeDocument.cancelSelect();
                          }
                          
                          if (UltraEdit.document.length > 0)  // Is any file opened?
                          {
                             // Define environment for this script.
                             UltraEdit.insertMode();
                             if (typeof(UltraEdit.columnModeOff) == "function") UltraEdit.columnModeOff();
                             else if (typeof(UltraEdit.activeDocument.columnModeOff) == "function") UltraEdit.activeDocument.columnModeOff();
                             // Move caret to top of active file.
                             UltraEdit.activeDocument.top();
                             PrepareAndLookup();
                          }