import java.io.*; import org.w3c.dom.*; import org.w3c.dom.html.*; import javax.xml.parsers.*; import javax.xml.transform.*; import javax.xml.transform.stream.*; import javax.xml.transform.dom.*; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; /** * This class repairs invalid HTML & javascript code produced by * IHMC Concept Map Tools, v2.9.1. * * This code is experimental and is recommended only for testing purposes. * * @author Miika Nurminen (minurmin@cc.jyu.fi) * @version 0.1 / 2003-06-16 */ public class Transform { // Global value so it can be ref'd by the tree-adapter Document document; /** * Creates a DOM representation of an XML document given in filename. * * @param filename input file */ public void fromFile(String filename) throws SAXException,ParserConfigurationException,IOException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); document = builder.parse( new File(filename) ); } /** * Perform actual transforming & element cleanup. * Cleanup is done using W3C DOM interfaces. * * Javascript code is moved to a separate file that is referenced from * HTML. Most formatting information is omitted or simplified. * Inline pictures (filenames starting with 'in_' are embedded directly to * HTML page, no links. */ public void doTransform() { NodeList nl; Element e; Node n; e = (Element)(document.getElementsByTagName("script").item(0)); // cleaning up SCRIPT element nl=e.getChildNodes(); while(nl.getLength()>0) { e.removeChild(nl.item(0)); } e.setAttribute("language","JavaScript"); e.setAttribute("src","cmpopup.js"); e.appendChild(this.document.createComment("content for Opera 7")); // removing bgcolor & onclick from body e = (Element)(document.getElementsByTagName("body").item(0)); e.removeAttribute("bgcolor"); e.removeAttribute("onclick"); // clearing presentation-specific (and partially invalid) *-attributes nl = document.getElementsByTagName("table"); for (int i=0; i0) { n = nl.item(nl.getLength()-1).getParentNode(); NodeList nl2 = nl.item(nl.getLength()-1).getChildNodes(); for (int i=0; i0) for (int i=nl.getLength()-1; i>=0; i--) { // reverse because we are removing one element e = (Element)(nl.item(i)); if (e.getAttribute("shape").equals("default")) { e.getParentNode().removeChild(e); } else { String s = e.getAttribute("onmouseover"); if (!s.equals("")) { int j = s.indexOf("\\'")+2; int k = s.indexOf("\\'",j); e.removeAttribute("onmouseover"); e.setAttribute("alt", s.substring(j,k)); } } } // clearing STYLE attributes nl = document.getElementsByTagName("div"); for (int i=0; i0) for (int i=nl.getLength()-1; i>=0; i--) { // reverse because we are removing some links e = (Element)(nl.item(i)); e.removeAttribute("onclick"); if ((e.getAttribute("href").indexOf("gif")>=0) || (e.getAttribute("href").indexOf("jpg")>=0)) { String desc = e.getChildNodes().item(0).getNodeValue(); if (desc.indexOf("in_")!=0) continue; // if description does start with in_, image is placed inline desc=desc.substring(3); Element e2 = document.createElement("img"); e2.setAttribute("src", e.getAttribute("href")); e2.setAttribute("alt", desc); e.getParentNode().insertBefore(e2, e); e.getParentNode().removeChild(e); } } // adding dummy ALT tags to get it validated & clearing invalid align attrs nl = document.getElementsByTagName("img"); for (int i=0; i