UltraEdit script to convert XML file content to MySQL script

UltraEdit script to convert XML file content to MySQL script

3
NewbieNewbie
3

    Jan 21, 2020#1

    Hi,

    Is there any UltraEdit script that can convert XML files to MySQL scripts (or something close)?

    So XML content like

    Code: Select all

    <record>
    <document-id>patent.US336962</document-id>
    <patent-family>2406050</patent-family>
    <assignee source="IFI">
    <name>TESLA ELEC LIGHT &amp; MFG CO</name>
    </assignee>
    <assignee source="IFI">
    <name>THE TESLA ELECTRIC LIGHT AND MANUFACTURING COMPANY</name>
    </assignee>
    </record>
    
    <record>
    <document-id>patent.US337179</document-id>
    <patent-family>2406267</patent-family>
    <assignee source="IFI">
    <name>ALBEA SRVC SAS</name>
    <address>FR</address>
    </assignee>
    </record>
    
    converts to something like

    Code: Select all

    INSERT INTO `norm_export_xml`.`doc` (`document-id`, `patent-family`, `assignee1_source`, `assignee1_name`, `assignee2_source`, `assignee2_name`) VALUES ('patent.US336962', '2406050', 'IFI', 'TESLA ELEC LIGHT &amp; MFG CO', 'IFI', 'THE TESLA ELECTRIC LIGHT AND MANUFACTURING COMPANY');
    May have more than two assignee, may do something different for the table structure.

    Thanks for any info.

    6,602547
    Grand MasterGrand Master
    6,602547

      Jan 22, 2020#2

      I am quite sure that there is no general script to convert an XML file to an SQL file. Who defines the requirements for such a general conversion script?

      I wrote a commented UltraEdit script for the example posted by you. It is partly general and partly special coded for the assignee elements with its attribute and its child elements.

      Code: Select all

      // CDATA sections are not supported by this script.
      
      if (UltraEdit.document.length > 0)  // Is any file opened?
      {
         // Define the environment for this script.
         UltraEdit.insertMode();
         if (typeof(UltraEdit.columnModeOff) == "function") UltraEdit.columnModeOff();
         else if (typeof(UltraEdit.activeDocument.columnModeOff) == "function") UltraEdit.activeDocument.columnModeOff();
      
         // Get current line and column number of caret in active file.
         var nLine = UltraEdit.activeDocument.currentLineNum;
         var nColumn = UltraEdit.activeDocument.currentColumnNum;
      
         UltraEdit.activeDocument.selectAll();
      
         // Is the file not empty?
         if (UltraEdit.activeDocument.isSel())
         {
            // Get all record elements as an array of strings.
            var asRecords = UltraEdit.activeDocument.selection.match(/<record>[\s\S]+?<\/record>/g);
      
            // Cancel the selection and move caret back to initial position.
            UltraEdit.activeDocument.gotoLine(nLine,nColumn);
      
            // Does the active file contain any record element?
            if (asRecords)
            {
               var asSqlLines = [];
               // Process one record after the other until no more left.
               while (asRecords.length)
               {
                  // Remove the start record tag with the following
                  // whitespaces and the end record tag with the withspaces
                  // before and all empty elements with the withspaces.
                  asRecords[0] = asRecords[0].replace(/<record>\s*|\s*<\/record>|\s*<[^>]+?\/>\s*/g,"");
                  // Remove all whitespaces between two elements.
                  asRecords[0] = asRecords[0].replace(/>\s*</g,"><");
      
                  // Get all child elements of record element with their child elements.
                  var asElements = asRecords[0].match(/<([\w\-]+).*?>([\s\S]*?)<\/\1>/g);
      
                  // Has the record element child elements?
                  if (asElements)
                  {
                      var asNames = [];
                      var asValues = [];
                      var nAssigneeCount = 0;
      
                      // Process each child element of the record element.
                      for (var nElement = 0; nElement < asElements.length; nElement++)
                      {
                          // Get the name of the child element.
                          sName = asElements[nElement].replace(/^<([\w\-]+).*$/,"$1");
      
                          // Is the child element not the assignee element?
                          if (sName != "assignee")
                          {
                              // Append the name of this element to the array of element names.
                              asNames.push(sName);
                              // Append the value of this element to the array of element values.
                              asValues.push(asElements[nElement].replace(/^<.*?>([\s\S]*?)<\/.+>$/,"$1"));
                          }
                          else
                          {
                              // The assignee element must be handled special because of source attribute
                              // value must be get and names and values of its child elements and a count
                              // number must be added to the names of all these data?
                              nAssigneeCount++;
                              sName += nAssigneeCount.toString(10) + "_";
      
                              // Has this assignee element the attribute source?
                              if (asElements[nElement].search(/^<assignee\s+source=".*?"/) == 0)
                              {
                                  // Add the source attribute with name and value to both arrays.
                                  asNames.push(sName + "source");
                                  asValues.push(asElements[nElement].replace(/^<assignee\s+source="(.*?)".+$/,"$1"));
                              }
      
                              // Remove the assignee tags.
                              asElements[nElement] = asElements[nElement].replace(/<assignee.*?>|<\/assignee>/g,"");
      
                              // Get the child elements of the assignee element.
                              var asAssigneeChilds = asElements[nElement].match(/<([\w\-]+).*?>([\s\S]*?)<\/\1>/g);
      
                              // Are there any child elements?
                              if (asAssigneeChilds)
                              {
                                  // Process the child elements of assignee element with
                                  // the special concatenated name for each child element.
                                  for (var nAssigneeChild = 0; nAssigneeChild < asAssigneeChilds.length; nAssigneeChild++)
                                  {
                                      var sChildName = asAssigneeChilds[nAssigneeChild].replace(/^<([\w\-]+).*$/,"$1");
                                      asNames.push(sName + sChildName);
                                      asValues.push(asAssigneeChilds[nAssigneeChild].replace(/^<.*?>([\s\S]*?)<\/.*>$/,"$1"));
                                  }
                              }
                          }
                      }
      
                      // Are there element names and values?
                      if (asNames.length)
                      {
                          // Create the SQL instruction line and append it to the array of SQL lines.
                          var sSqlLine = "INSERT INTO `norm_export_xml`.`doc` (`" + asNames[0] + "`";
                          for (var nName = 1; nName < asNames.length; nName++)
                          {
                              sSqlLine += ", `" + asNames[nName] + "`";
                          }
                          sSqlLine += ") VALUES ('" + asValues[0] + "'";
                          for (var nValue = 1; nValue < asValues.length; nValue++)
                          {
                              sSqlLine += ", '" + asValues[nValue] + "'";
                          }
                          sSqlLine += ");";
                          asSqlLines.push(sSqlLine);
                      }
                  }
      
                  // Remove the just processed record element from the array.
                  asRecords.splice(0,1);
               }
      
               // Is there any SQL line to write into a new file?
               if (asSqlLines.length)
               {
                  // Add an empty string for line termination of last line.
                  asSqlLines.push("");
                  UltraEdit.newFile();
                  UltraEdit.activeDocument.unixMacToDos();
                  UltraEdit.activeDocument.write(asSqlLines.join("\r\n"));
               }
            }
         }
      }
      
      Best regards from an UC/UE/UES for Windows user from Austria

      3
      NewbieNewbie
      3

        Jan 22, 2020#3

        Hey Mofi.

        I run it on one of my XML files and it seems to run great. Thanks

        My whole idea for this is to do a quick 'find in file' search for lots of xml files (10000's)
        Sometime I may need to do a search for assignee address/name that match certain regular expression. 
        Currently I will do a 'find in files' in Ultraedit under that directory with thousands of xml files which takes a while.
        If I have to adjust the regex, it will have to research all over again.

        I am thinking if I can put all those xml data into a mysql datatbase, I can do a query instead which should speed things up. (since mysql have index to help)

        Now even with your scripts, I have to run that script through thousands of files and then run the sql scripts that may take a while. I will try that out and see which way is better.
        BTW, how can I run your scripts for all the xml files in a directory (including sub directory)

        Just an FYI
        I got the idea converting XML to mysql from the mysql import feature.
        https://dev.mysql.com/doc/refman/5.5/en/load-xml.html

        When I try to feed my xml to mysql import, it fails. That's why I tried to just covert the XML to SQL script using Ultraedit.
        Mysql import seems to be able to handle some XML, probably a more simple specific structure only.
        They also have a json import feature too, not sure it's any better...

        6,602547
        Grand MasterGrand Master
        6,602547

          Jan 22, 2020#4

          The script below can be used to process all *.xml files in a directory after
          1. adding code of function GetFileName and
          2. of function GetListOfFiles to the script and
          3. edit the two directory path strings assigned to the variables g_sDirectoryXML and g_sDirectorySQL at top of the script.
          Information about processed files is written to output window during script execution.

          Note 1: There must be perhaps configured the variables sSummaryInfo and sResultsDocTitle in function GetListOfFiles as described by the comments.

          Note 2: The directory for the SQL files created by the script must already exist before starting the script.

          Code: Select all

          // CDATA sections are not supported by this script.
          
          var g_sDirectoryXML = "C:\\Temp\\XML\\"
          var g_sDirectorySQL = "C:\\Temp\\SQL\\"
          
          // Insert here the functions:
          
          // GetFileName ...... http://forums.ultraedit.com/viewtopic.php?f=52&t=6762
          
          // GetListOfFiles ... http://forums.ultraedit.com/viewtopic.php?f=52&t=5442
          
          function XmlToSql (sFullXmlFileName)
          {
             UltraEdit.open(sFullXmlFileName);
             if (UltraEdit.activeDocument.path != sFullXmlFileName)
             {
                UltraEdit.outputWindow.write("Failed to open " + sFullXmlFileName);
                return;
             }
          
             UltraEdit.activeDocument.selectAll();
          
             // Is the file empty?
             if (!UltraEdit.activeDocument.isSel())
             {
                UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
                UltraEdit.outputWindow.write("Empty file " + sFullXmlFileName);
                return;
             }
          
             // Get all record elements as an array of strings.
             var asRecords = UltraEdit.activeDocument.selection.match(/<record>[\s\S]+?<\/record>/g);
             UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
          
             // Does the active file not contain any record element?
             if (!asRecords)
             {
                UltraEdit.outputWindow.write("No record in " + sFullXmlFileName);
                return;
             }
          
             var asSqlLines = [];
             // Process one record after the other until no more left.
             while (asRecords.length)
             {
                // Remove the start record tag with the following
                // whitespaces and the end record tag with the withspaces
                // before and all empty elements with the withspaces.
                asRecords[0] = asRecords[0].replace(/<record>\s*|\s*<\/record>|\s*<[^>]+?\/>\s*/g,"");
                // Remove all whitespaces between two elements.
                asRecords[0] = asRecords[0].replace(/>\s*</g,"><");
          
                // Get all child elements of record element with their child elements.
                var asElements = asRecords[0].match(/<([\w\-]+).*?>([\s\S]*?)<\/\1>/g);
          
                // Has the record element child elements?
                if (asElements)
                {
                    var asNames = [];
                    var asValues = [];
                    var nAssigneeCount = 0;
          
                    // Process each child element of the record element.
                    for (var nElement = 0; nElement < asElements.length; nElement++)
                    {
                        // Get the name of the child element.
                        sName = asElements[nElement].replace(/^<([\w\-]+).*$/,"$1");
          
                        // Is the child element not the assignee element?
                        if (sName != "assignee")
                        {
                            // Append the name of this element to the array of element names.
                            asNames.push(sName);
                            // Append the value of this element to the array of element values.
                            asValues.push(asElements[nElement].replace(/^<.*?>([\s\S]*?)<\/.+>$/,"$1"));
                        }
                        else
                        {
                            // The assignee element must be handled special because of source attribute
                            // value must be get and names and values of its child elements and a count
                            // number must be added to the names of all these data?
                            nAssigneeCount++;
                            sName += nAssigneeCount.toString(10) + "_";
          
                            // Has this assignee element the attribute source?
                            if (asElements[nElement].search(/^<assignee\s+source=".*?"/) == 0)
                            {
                                // Add the source attribute with name and value to both arrays.
                                asNames.push(sName + "source");
                                asValues.push(asElements[nElement].replace(/^<assignee\s+source="(.*?)".+$/,"$1"));
                            }
          
                            // Remove the assignee tags.
                            asElements[nElement] = asElements[nElement].replace(/<assignee.*?>|<\/assignee>/g,"");
          
                            // Get the child elements of the assignee element.
                            var asAssigneeChilds = asElements[nElement].match(/<([\w\-]+).*?>([\s\S]*?)<\/\1>/g);
          
                            // Are there any child elements?
                            if (asAssigneeChilds)
                            {
                                // Process the child elements of assignee element with
                                // the special concatenated name for each child element.
                                for (var nAssigneeChild = 0; nAssigneeChild < asAssigneeChilds.length; nAssigneeChild++)
                                {
                                    var sChildName = asAssigneeChilds[nAssigneeChild].replace(/^<([\w\-]+).*$/,"$1");
                                    asNames.push(sName + sChildName);
                                    asValues.push(asAssigneeChilds[nAssigneeChild].replace(/^<.*?>([\s\S]*?)<\/.*>$/,"$1"));
                                }
                            }
                        }
                    }
          
                    // Are there element names and values?
                    if (asNames.length)
                    {
                        // Create the SQL instruction line and append it to the array of SQL lines.
                        var sSqlLine = "INSERT INTO `norm_export_xml`.`doc` (`" + asNames[0] + "`";
                        for (var nName = 1; nName < asNames.length; nName++)
                        {
                            sSqlLine += ", `" + asNames[nName] + "`";
                        }
                        sSqlLine += ") VALUES ('" + asValues[0] + "'";
                        for (var nValue = 1; nValue < asValues.length; nValue++)
                        {
                            sSqlLine += ", '" + asValues[nValue] + "'";
                        }
                        sSqlLine += ");";
                        asSqlLines.push(sSqlLine);
                    }
                }
          
                // Remove the just processed record element from the array.
                asRecords.splice(0,1);
             }
          
             // Is there any SQL line to write into a new file?
             if (asSqlLines.length)
             {
                // Add an empty string for line termination of last line.
                asSqlLines.push("");
                UltraEdit.newFile();
                UltraEdit.activeDocument.unixMacToDos();
                UltraEdit.activeDocument.write(asSqlLines.join("\r\n"));
          
                var sFullSqlFileName = g_sDirectorySQL + GetFileName(sFullXmlFileName) + ".sql";
                UltraEdit.saveAs(sFullSqlFileName);
                if (UltraEdit.activeDocument.path == sFullSqlFileName)
                {
                   var nRecords = asSqlLines.length - 1;
                   UltraEdit.outputWindow.write("Saved " + nRecords + " record" + ((nRecords==1) ? "" : "s") +
                                                " into " + sFullSqlFileName);
                }
                else
                {
                   UltraEdit.outputWindow.write("Failed to save " + sFullSqlFileName);
                }
                UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
             }
             else
             {
                UltraEdit.outputWindow.write("No data in " + sFullXmlFileName);
             }
          }
          
          // Are the two required functions defined in this script?
          if ((typeof(GetFileName) == "function") && (typeof(GetListOfFiles) == "function"))
          {
             // Define the environment for this script.
             UltraEdit.insertMode();
             if (typeof(UltraEdit.columnModeOff) == "function") UltraEdit.columnModeOff();
             else if (typeof(UltraEdit.activeDocument.columnModeOff) == "function") UltraEdit.activeDocument.columnModeOff();
          
             // Make sure the path for the SQL files ends with a backslash.
             if (g_sDirectorySQL.substr(g_sDirectorySQL.length - 1) != "\\")
             {
                g_sDirectorySQL += "\\";
             }
          
             if (!UltraEdit.outputWindow.visible) UltraEdit.outputWindow.showWindow(true);
          
             if (GetListOfFiles(0,g_sDirectoryXML,"*.xml",true))
             {
                UltraEdit.activeDocument.selectAll();
                var asXmlFileNames = UltraEdit.activeDocument.selection.split("\r\n");
                UltraEdit.closeFile(UltraEdit.activeDocument.path,2);
                asXmlFileNames.pop();
          
                for (var nFile = 0; nFile < asXmlFileNames.length; nFile++)
                {
                   XmlToSql(asXmlFileNames[nFile]);
                }
             }
             else
             {
                UltraEdit.outputWindow.write("No *.xml file found in " + g_sDirectoryXML);
             }
          }
          else
          {
             if (typeof(GetFileName) != "function")
             {
                UltraEdit.messageBox("The function GetFileName is missing in script file.");
             }
             if (typeof(GetListOfFiles) != "function")
             {
                UltraEdit.messageBox("The function GetListOfFiles is missing in script file.");
             }
          }
          
          Best regards from an UC/UE/UES for Windows user from Austria

          3
          NewbieNewbie
          3

            Jan 22, 2020#5

            Thank you Mofi.

            I just did a test run with 3 large xml files and it convert them with no problem.

            Running script: C:\...\MySQLTest\XMLToSQLAllFiles.js
            ========================================================================================================
            Saved 22336 records into C:\Temp\XMLtoSQL\SQL\20191126-1141-...-P02-0048_feed.sql
            Saved 22825 records into C:\Temp\XMLtoSQL\SQL\20191126-1151-...-P01-0054_feed.sql
            Saved 22892 records into C:\Temp\XMLtoSQL\SQL\20191126-1154-...-P01-0050_feed.sql
            Script succeeded.

            Thanks for all the help. :)

            6,602547
            Grand MasterGrand Master
            6,602547

              Jan 23, 2020#6

              That's fine. I have some hints on running the script on thousands of XML files:
              1. Select Open file without temp file but NO Prompt at Advanced - Settings or Configuration - File handling - Temporary files to avoid that UltraEdit creates a temporary file for each XML file opened during script execution. Don't forget to set this setting back to Use temporary file for editing (normal operation) or the other setting if you prefer that after script execution finished.
              2. Uncheck Enable syntax coloring at Advanced - Settings or Configuration - Editor display - Syntax highlighting to speed up opening the XML files by preventing running syntax highlighting on each opened XML file. Don't forget to check this setting again after script execution finished.
              3. Uncheck Auto parse XML documents less than  X  MB at Advanced - Settings or Configuration - XML manager to avoid that UltraEdit parses each opened XML for XML Manager view. Restore the initial state after script execution finished.
              Best regards from an UC/UE/UES for Windows user from Austria