User to user discussion and support for UltraEdit, UEStudio, UltraCompare, and other IDM applications.

Help with writing and running scripts
5 posts Page 1 of 1
Code: Select all
article_5-1.xml
article_8-1.xml
article_10-1.xml
article_18-1.xml
article_24-1.xml
article_30-1.xml
article_36-1.xml
article_42-1.xml
article_5-1.xml
article_8-1.xml
article_10-1.xml
article_18-1.xml
article_24-1.xml
article_30-1.xml
article_36-1.xml
article_42-1.xml
...

I have a folder structure like above.
I want to find which article contains below 20 words in <body.content>...</body.content>

The exceptions are give below...

Code: Select all
<p><a name="5-1" class="replica-location"/></p>
<content><img src="img_5-1.jpg" credit="Getty Images"/>Somenath</content>

Above tags are not countable.

Attachments

Please see this,
I hope this helps you

Code: Select all
   UltraEdit.outputWindow.clear();
   var sDirectory = UltraEdit.getString("Enter Path of Files = ",1);
   var sDirectory1 = sDirectory.replace(/\\/g,"\\");

   UltraEdit.perlReOn();
   UltraEdit.frInFiles.searchInFilesTypes="*.xml";
   UltraEdit.frInFiles.directoryStart=sDirectory1 + "\\";
   UltraEdit.frInFiles.openMatchingFiles=false;
   UltraEdit.frInFiles.ignoreHiddenSubs=true;
   UltraEdit.frInFiles.filesToSearch=0;
   UltraEdit.frInFiles.useEncoding=true;
   UltraEdit.frInFiles.encoding=65001;  // The files are UTF-8 encoded!
   UltraEdit.frInFiles.useOutputWindow=true;
   UltraEdit.frInFiles.matchCase=true;
   UltraEdit.frInFiles.matchWord=false;
   UltraEdit.frInFiles.preserveCase=false;
   UltraEdit.frInFiles.searchSubs=true;
   UltraEdit.frInFiles.replaceAll=true;
   UltraEdit.frInFiles.regExp=true;
   
   UltraEdit.frInFiles.find("<body.content>([\\S\\s]+?)</body.content>");
   UltraEdit.outputWindow.copy();
   UltraEdit.newFile();
   UltraEdit.activeDocument.paste();

   UltraEdit.insertMode();
   UltraEdit.columnModeOff();
   UltraEdit.activeDocument.hexOff();
   UltraEdit.perlReOn();
   UltraEdit.activeDocument.findReplace.mode=0;
   UltraEdit.activeDocument.findReplace.matchCase=true;
   UltraEdit.activeDocument.findReplace.matchWord=false;
   UltraEdit.activeDocument.findReplace.searchDown=true;
   if (typeof(UltraEdit.activeDocument.findReplace.searchInColumn) == "boolean") {
      UltraEdit.activeDocument.findReplace.searchInColumn=false;
   }
   // Move caret to top of the file.
   UltraEdit.activeDocument.top();

   // Determine once type of line termination.
   var sLineTerm = "\r\n";           // Default is DOS.
   if (typeof(UltraEdit.activeDocument.lineTerminator) == "number") {
      if (UltraEdit.activeDocument.lineTerminator == 2) sLineTerm = "\n";
      else if (UltraEdit.activeDocument.lineTerminator == 3) sLineTerm = "\r";
   }
   else {
      // This version of UE/UES does not offer line terminator property.
      UltraEdit.activeDocument.findReplace.regExp=false;
      if (!UltraEdit.activeDocument.findReplace.find(sLineTerm)) {
         sLineTerm = "\n";           // Not DOS, perhaps UNIX.
         if (!UltraEdit.activeDocument.findReplace.find(sLineTerm)) {
            sLineTerm = "\r";        // Also not UNIX, perhaps MAC.
            if (!UltraEdit.activeDocument.findReplace.find(sLineTerm)) {
               sLineTerm = "\r\n";   // No line terminator, use DOS.
            }
         }
      }
      UltraEdit.activeDocument.top();
   }

   UltraEdit.activeDocument.findReplace.regExp=true;
   UltraEdit.activeDocument.findReplace.replaceAll=true;
   UltraEdit.activeDocument.findReplace.replaceInAllOpen=false;
   UltraEdit.activeDocument.findReplace.replace('<p><a name="[^\r\n]*?</p>','');
   UltraEdit.activeDocument.findReplace.replace('<content>[^\r\n]*?</content>','');
   UltraEdit.activeDocument.findReplace.replace("^Find[^\r\n]*?$",'');
   UltraEdit.activeDocument.findReplace.replace('Found[^\r\n]*?$','');
   UltraEdit.activeDocument.findReplace.replace('found[^\r\n]*?$','');
   UltraEdit.activeDocument.findReplace.replace('Search complete, ','');
   UltraEdit.activeDocument.findReplace.replace('^.*\\\\([^<>]*?)\.xml.*: <body.content>','$1.xml\r\n<body.content>');
   UltraEdit.activeDocument.findReplace.replace('<body.content>','//body.content');
   UltraEdit.activeDocument.findReplace.replace('</body.content>','///body.content');
   UltraEdit.activeDocument.findReplace.replace('<[^<>]*?>','');
   UltraEdit.activeDocument.findReplace.replace('(\\r?\\n){2,}','$1');
   UltraEdit.activeDocument.findReplace.replaceAll=false;
   while (UltraEdit.activeDocument.findReplace.find("(?s)//body.content.*///body.content"))
   {
      var sBlock = UltraEdit.activeDocument.selection.replace(/\/\/body.content.*(?:\r\n|\n|\r)|\/\/\/body.content/g,"");
      if (sBlock == "") continue;  // Ignore empty blocks.

      // Verify if at least 1 line terminator is found in the remaining block.
      var nLineCount = sBlock.indexOf(sLineTerm);
      if (nLineCount < 0)
      {   // If no line terminator found, the block
         nLineCount = 1;           // contains just a part of a line counted
      }                            // nevertheless as 1 line.
      else
      {
         // Block contains 1 or more lines. Split the block up into
         // an array of strings each containing an entire line.
         var asLines = sBlock.split(sLineTerm);
         nLineCount = asLines.length;
         // If the block ends with a line termination (last string is
         // empty), decrease the number of lines by 1 to get correct result.
         if (asLines[nLineCount-1] == "") nLineCount--;
      }

      // Replace all sequences of whitspace characters (spaces,
      // tabs, line terminators, form-feeds) by a single space.
      // This expression defines which string is interpreted as "word".
      var sText = sBlock.replace(/\s+/g," ");

      // Split the string of words into an array of strings each containing
      // one word. The number of strings is equal the number of words.
      var asWords = sText.split(" ");
      var nWordCount = asWords.length;

      // But if text ends with a space character (last string empty),
      // the count must be decreased by one to get correct word count.
      if (asWords[nWordCount-1] == "") nWordCount--;
      // Also if the text starts with a space character (first string
      // empty), the count must be decreased by one for correct count.
      if (asWords[0] == "") nWordCount--;

      // Write the result into the file below the still selected block.
      // Selection is discarded with moving the caret to end of line
      // even if the caret is already at end of the marker line.
      WordCount = nWordCount.toString(10);

      if(WordCount < 20)
      {
         UltraEdit.activeDocument.write("Total words: " + nWordCount);
      }
   }
   UltraEdit.activeDocument.top();
   UltraEdit.activeDocument.findReplace.replaceAll=true;
   UltraEdit.activeDocument.findReplace.replace("----------------------------------------" + sLineTerm + ".+.xml" + sLineTerm + "//body.content[\\S\\s]+?///body.content" + sLineTerm,"");
   if (UltraEdit.activeDocument.isEof() == 1)
   {
      UltraEdit.activeDocument.write("NO File Present in this Directory.");
   }
   UltraEdit.saveAs(sDirectory1 + "\\Report.txt");
At first, I would like to thank you. It is working quite nicely but there will be some changes to be done. I am uploading a file which is below 20 words article but the script is ignore the file. Kindly help me.

Somenath

Attachments

First of all sorry,
I miss this case.
Code: Select all
   UltraEdit.outputWindow.clear();
   var sDirectory = UltraEdit.getString("Enter Path of Files = ",1);
   var sDirectory1 = sDirectory.replace(/\\/g,"\\");

   UltraEdit.perlReOn();
   UltraEdit.frInFiles.searchInFilesTypes="*.xml";
   UltraEdit.frInFiles.directoryStart=sDirectory1 + "\\";
   UltraEdit.frInFiles.openMatchingFiles=false;
   UltraEdit.frInFiles.ignoreHiddenSubs=true;
   UltraEdit.frInFiles.filesToSearch=0;
   UltraEdit.frInFiles.useEncoding=true;
   UltraEdit.frInFiles.encoding=65001;  // The files are UTF-8 encoded!
   UltraEdit.frInFiles.useOutputWindow=true;
   UltraEdit.frInFiles.matchCase=true;
   UltraEdit.frInFiles.matchWord=false;
   UltraEdit.frInFiles.preserveCase=false;
   UltraEdit.frInFiles.searchSubs=true;
   UltraEdit.frInFiles.replaceAll=true;
   UltraEdit.frInFiles.regExp=true;
   
   UltraEdit.frInFiles.find("<body.content>([\\S\\s]+?)</body.content>");
   UltraEdit.outputWindow.copy();
   UltraEdit.newFile();
   UltraEdit.activeDocument.paste();

   UltraEdit.insertMode();
   UltraEdit.columnModeOff();
   UltraEdit.activeDocument.hexOff();
   UltraEdit.perlReOn();
   UltraEdit.activeDocument.findReplace.mode=0;
   UltraEdit.activeDocument.findReplace.matchCase=true;
   UltraEdit.activeDocument.findReplace.matchWord=false;
   UltraEdit.activeDocument.findReplace.searchDown=true;
   if (typeof(UltraEdit.activeDocument.findReplace.searchInColumn) == "boolean") {
      UltraEdit.activeDocument.findReplace.searchInColumn=false;
   }
   // Move caret to top of the file.
   UltraEdit.activeDocument.top();

   // Determine once type of line termination.
   var sLineTerm = "\r\n";           // Default is DOS.
   if (typeof(UltraEdit.activeDocument.lineTerminator) == "number") {
      if (UltraEdit.activeDocument.lineTerminator == 2) sLineTerm = "\n";
      else if (UltraEdit.activeDocument.lineTerminator == 3) sLineTerm = "\r";
   }
   else {
      // This version of UE/UES does not offer line terminator property.
      UltraEdit.activeDocument.findReplace.regExp=false;
      if (!UltraEdit.activeDocument.findReplace.find(sLineTerm)) {
         sLineTerm = "\n";           // Not DOS, perhaps UNIX.
         if (!UltraEdit.activeDocument.findReplace.find(sLineTerm)) {
            sLineTerm = "\r";        // Also not UNIX, perhaps MAC.
            if (!UltraEdit.activeDocument.findReplace.find(sLineTerm)) {
               sLineTerm = "\r\n";   // No line terminator, use DOS.
            }
         }
      }
      UltraEdit.activeDocument.top();
   }

   UltraEdit.activeDocument.findReplace.regExp=true;
   UltraEdit.activeDocument.findReplace.replaceAll=true;
   UltraEdit.activeDocument.findReplace.replaceInAllOpen=false;
   UltraEdit.activeDocument.findReplace.replace('<p><a name="[^\r\n]*?</p>','');
   UltraEdit.activeDocument.findReplace.replace('<content>[^\r\n]*?</content>','');
   UltraEdit.activeDocument.findReplace.replace("^Find[^\r\n]*?$",'');
   UltraEdit.activeDocument.findReplace.replace('Found[^\r\n]*?$','');
   UltraEdit.activeDocument.findReplace.replace('found[^\r\n]*?$','');
   UltraEdit.activeDocument.findReplace.replace('Search complete, ','');
   UltraEdit.activeDocument.findReplace.replace('^.*\\\\([^<>]*?)\.xml.*: <body.content>','$1.xml\r\n<body.content>');
   UltraEdit.activeDocument.findReplace.replace('<body.content>','//body.content');
   UltraEdit.activeDocument.findReplace.replace('</body.content>','///body.content');
   UltraEdit.activeDocument.findReplace.replace('<[^<>]*?>','');
   UltraEdit.activeDocument.findReplace.replace('(\\r?\\n){2,}','$1');
   UltraEdit.activeDocument.findReplace.replaceAll=false;
   while (UltraEdit.activeDocument.findReplace.find("(?s)//body.content.*///body.content"))
   {
      var sBlock = UltraEdit.activeDocument.selection.replace(/\/\/body.content.*(?:\r\n|\n|\r)|\/\/\/body.content/g,"");
      
      // Verify if at least 1 line terminator is found in the remaining block.
      var nLineCount = sBlock.indexOf(sLineTerm);
      if (nLineCount < 0)
      {   // If no line terminator found, the block
         nLineCount = 1;           // contains just a part of a line counted
      }                            // nevertheless as 1 line.
      else
      {
         // Block contains 1 or more lines. Split the block up into
         // an array of strings each containing an entire line.
         var asLines = sBlock.split(sLineTerm);
         nLineCount = asLines.length;
         // If the block ends with a line termination (last string is
         // empty), decrease the number of lines by 1 to get correct result.
         if (asLines[nLineCount-1] == "") nLineCount--;
      }

      // Replace all sequences of whitspace characters (spaces,
      // tabs, line terminators, form-feeds) by a single space.
      // This expression defines which string is interpreted as "word".
      var sText = sBlock.replace(/\s+/g," ");

      // Split the string of words into an array of strings each containing
      // one word. The number of strings is equal the number of words.
      var asWords = sText.split(" ");
      var nWordCount = asWords.length;

      // But if text ends with a space character (last string empty),
      // the count must be decreased by one to get correct word count.
      if (asWords[nWordCount-1] == "") nWordCount--;
      
      // Write the result into the file below the still selected block.
      // Selection is discarded with moving the caret to end of line
      // even if the caret is already at end of the marker line.
      WordCount = nWordCount.toString(10);

      if(WordCount < 20)
      {
         UltraEdit.activeDocument.write("Total words: " + nWordCount);
      }
   }
   UltraEdit.activeDocument.top();
   UltraEdit.activeDocument.findReplace.replaceAll=true;
   UltraEdit.activeDocument.findReplace.replace("----------------------------------------" + sLineTerm + ".+.xml" + sLineTerm + "//body.content[\\S\\s]+?///body.content" + sLineTerm,"");
   if (UltraEdit.activeDocument.isEof() == 1)
   {
      UltraEdit.activeDocument.write("NO File Present in this Directory.");
   }
   UltraEdit.saveAs(sDirectory1 + "\\Report.txt");


I hope this script overcome your problem.
Many many thanks to you.
It's working really nice.

Somenath
5 posts Page 1 of 1