Script for converting text files to ANSI encoded files with DOS/Windows line endings

Script for converting text files to ANSI encoded files with DOS/Windows line endings

6,680583
Grand MasterGrand Master
6,680583

    16:54 - May 19#1

    The characters in text files can be encoded in various ways. Common are the character encodings:
    1. ANSI which is a synonym for one byte per character encoded text using a code page.
    2. UTF-8 without or with a byte order mark.
    3. UTF-16 Little Endian without or with a byte order mark.
    4. UTF-16 Big Endian without or with a byte order mark.
    The line ending respectively line termination type can also vary. Common for newlines are:
    1. DOS/Windows with carriage return + line-feed.
    2. Unix/Linux with just line-feed.
    3. Mac (prior Mac OS X) with just carriage return.
    UltraEdit and UEStudio have built-in functions to convert a text file from any character encoding to any other character encoding and from any line ending type to any other line ending type.

    That can be done also on using Save as with the Encoding option and the line ending option on button Save in the Save as dialog window.

    The character encoding and the line ending type of the active file is shown by UltraEdit and UEStudio in the status bar at the bottom of the main application window.

    The script below makes it simple to convert UTF-8 and UTF-16 encoded files to ANSI with using the code page as defined by the configuration setting Default code page (for ANSI encoding) and Unix or Mac line endings to DOS/Windows line endings.

    Code: Select all

    if (UltraEdit.document.length > 0)  // Is any file opened?
    {
       var sFileName = UltraEdit.activeDocument.path;
       // Is the active file not opened in hex edit mode?
       if (!UltraEdit.activeDocument.isHexModeOn())
       {
          var nFileChanged = 0;
          // Has the active file not DOS/Windows line terminators?
          if (UltraEdit.activeDocument.lineTerminator)
          {
             UltraEdit.activeDocument.unixMacToDos();
             UltraEdit.outputWindow.write(sFileName + " converted to DOS/Windows.");
             nFileChanged = 1;
          }
          else
          {
             UltraEdit.outputWindow.write(sFileName + " is a DOS/Windows text file.");
          }
          // Determine the character encoding of active file and convert
          // UTF-8, UTF-16 LE and UTF-16 BE encoded files to ASCII/ANSI.
          switch (UltraEdit.activeDocument.encoding)
          {
             case 1200:  // UTF-16 little endian encoded file.
             case 1201:  // UTF-16 big endian encoded file.
                         UltraEdit.activeDocument.unicodeToASCII();
                         UltraEdit.outputWindow.write(sFileName + " converted from UTF-16 to ASCII/ANSI.");
                         nFileChanged += 2;
                         break;
             case 65001: // UTF-8 encoded file.
                         UltraEdit.activeDocument.UTF8ToASCII();
                         UltraEdit.outputWindow.write(sFileName + " converted from UTF-8 to ASCII/ANSI.");
                         nFileChanged += 2;
                         break;
             default:    UltraEdit.outputWindow.write(sFileName + " is not a Unicode encoded file with the code page " + UltraEdit.activeDocument.codePage + ".");
                         break;
          }
          if (nFileChanged > 1)   // Is the character encoding changed?
          {
             var sXmlEncoding = "CP-1252";
             UltraEdit.perlReOn();
             UltraEdit.activeDocument.top();
             UltraEdit.activeDocument.findReplace.mode = 0;
             UltraEdit.activeDocument.findReplace.matchCase = true;
             UltraEdit.activeDocument.findReplace.matchWord = false;
             UltraEdit.activeDocument.findReplace.regExp = true;
             UltraEdit.activeDocument.findReplace.searchDown = true;
             UltraEdit.activeDocument.findReplace.searchInColumn = false;
             UltraEdit.activeDocument.findReplace.preserveCase = false;
             UltraEdit.activeDocument.findReplace.replaceAll = false;
             UltraEdit.activeDocument.findReplace.replaceInAllOpen = false;
             // Search in active file for UTF-8 or UTF-16 or ISO-10646-UCS-2
             // XML encoding declaration and adapt the encoding attribute.
             // The tag ?xml and the attribute encoding must be in lowercase
             // while the value of the attribute encoding is case-insensitive.
             // That is the reason for the unusual search expression.
             UltraEdit.activeDocument.findReplace.replace("^([\\t ]*<\\?xml[^>]+?\\<encoding=[\"'])(?:[Uu][Tt][Ff]-(?:8|16)|[Ii][Ss][Oo]-10646-[Uu][Cc][Ss]-2)","\\1" + sXmlEncoding);
             if (UltraEdit.activeDocument.isFound())
             {
                UltraEdit.outputWindow.write(sFileName + " changed XML encoding to " + sXmlEncoding + ".");
             }
             else
             {
                var sHtmlEncoding = "Windows-1252";
                UltraEdit.activeDocument.findReplace.matchCase = false;
                // Search in active file for UTF-8 or UTF-16 or ISO-10646-UCS-2
                // HTML/XHTML charset declaration and adapt the charset attribute.
                UltraEdit.activeDocument.findReplace.replace("(<meta[^>]+?\\<charset=[\"']?)(?:UTF-(?:8|16)|ISO-10646-UCS-2)","\\1" + sHtmlEncoding);
                if (UltraEdit.activeDocument.isFound())
                {
                   UltraEdit.outputWindow.write(sFileName + " changed HTML/XHTML charset to " + sHtmlEncoding + ".");
                }
             }
          }
          if (nFileChanged) // Is the active file modified by this script?
          {
             // Is the active file a new, unnamed file?
             if (UltraEdit.activeDocument.isName("") && UltraEdit.activeDocument.isExt(""))
             {
                UltraEdit.saveAs("");
             }
             else  // The active file is a named file which is saved now.
             {
                UltraEdit.save();
             }
          }
       }
       else  // The active file is most likely a binary file.
       {
          UltraEdit.outputWindow.write(sFileName + " is opened in hex edit mode.");
       }
       if (!UltraEdit.outputWindow.visible)   // Is the output window not visible?
       {
          UltraEdit.outputWindow.showWindow(true);
       }
    }
    
    The output window shows information about the file on which this script was run as well as the applied conversion if any conversion was run at all.

    The character encoding declaration of an XML file is also adapted by the script. Please configure the string value of the variable sXmlEncoding in the script according to the default code page for ANSI encoded files.

    The character set declaration of an HTML / XHTML file is also adapted by the script. Please configure the string value of the variable sHtmlEncoding in the script according to the default code page for ANSI encoded files.
    Best regards from an UC/UE/UES for Windows user from Austria