XML Handling Simplified

From CodeCodex

<HIGHLIGHTSYNTAX class="java"> /**

* Written By Bill Kress.  Feel free to use, distribute and enhance.

package tv.kress.scripting;

import java.io.IOException; import java.util.WeakHashMap;

import javax.xml.parsers.*;

import org.w3c.dom.Node; import org.xml.sax.SAXException;


* This is a class to make using XML node objects easier than DOM.  It really
* just contains a few helper methods so that simple concepts such
* as "Give me the value of an attribute named Blah" don't take 3
* calls and an entire line of unreadable code.

* * Start with something like this: <p> * * BKNode root=readDocument("C:\XMLFile.xml"); * <p> * Then use the methods in BKNode to access the fields and children of "root" * * @author billk * */ public class BKNode { // Constructors and bookeeping /** Stores this object's node. There should be exactly one Node for every BKNode */ private final Node node; /** This is a hash map to use as a cache.*/ private static HashMap cache=new HashMap(); /** * Private constructor. No Entrance! Use the factory or the readDocument method. * @param node The node to be encapsulated. Cannot be null. */ private BKNode(Node node) { this.node=node; } /** * This factory uses a cache so that if you ask for the same * node twice, you will get the same BKNode. Therefore if * node==node, then bknode==bknode. * * @param node the DOM node we are working with * @return a BKNode that contains the DOM node but with some helper methods. */ public static BKNode makeBKNode(Node node) { // This tends to happen a lot with getNextChild and the like if(node == null) return null; // Check the cache first. BKNode tmp=(BKNode)cache.get(node); if(tmp != null) return tmp; BKNode ret=new BKNode(node); cache.put(node, ret); return ret; } /** * This hides all the boilerplate crap that you usually need * in order to read in a document. The output BKNode will * contain a fully formed DOM tree. * <p> * I left the exceptions for the caller to deal with. * * @param xmlFileName The name you want read * @return the document used by the top of the tree wrapped up in a BKNode. * @throws SAXException * @throws IOException * @throws ParserConfigurationException */ public static BKNode readDocument(String xmlFileName) throws SAXException, IOException, ParserConfigurationException{ DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); domFactory.setCoalescing(true); DocumentBuilder domBuilder = domFactory.newDocumentBuilder(); Node top=domBuilder.parse(xmlFileName); return new BKNode(top); } /** * This is a shortcut for getting the attribute collection, scanning for an * attribute by name and returning the value of that attribute. * * @param attrib The name of the attribute (of the current node) you are looking for. * @return The value of that attribute (if one exists). */ public String getAttrNamed(String attrib) { Node tmp=node.getAttributes().getNamedItem(attrib); if(tmp == null) return null; return tmp.getNodeValue(); } /** * Shortcut for getting a child (non attribute) node with a given name. * * @param name The name of the child node you are seeking * @return The node (wrapped, of course) */ public BKNode getChildNamed(String name) { Node tmp=node.getFirstChild(); while(tmp != null) if(tmp.getNodeName().trim().equals(name)) return makeBKNode(tmp); else tmp=tmp.getNextSibling(); return null; } /** * Shortcut for grabbing the "Text" node from a tree. This is * the unnamed text in-between a start-tag and end-tag. * @return A string, the text. */ public String getText() { BKNode tmp=getChildNamed("#text"); if(tmp == null) return null; return tmp.getValue(); } /** * This gets the held node. Although you are welcome to use this (it can't hurt BKNode in any way) * please consider adding another helper method below to save yourself the step if you are going * to make the call more than once. * * @return the "Wrapped" node. */ public Node getNode() { return node; } //Methods that do nothing but forward functionality from node. If you need anything, add more public String getName() { return node.getNodeName(); } public String getValue() { return node.getNodeValue(); } public BKNode getFirstChild() { return makeBKNode(node.getFirstChild()); } public BKNode getNextSibling() { return makeBKNode(node.getNextSibling()); } public BKNode getParentNode() { return makeBKNode(node.getParentNode()); } public String toString() { return node.toString(); } } </HIGHLIGHTSYNTAX>



 // example HTML code: (could also come from an URL)

$html = '<html> <head> <title>links</title> </head> <body> <a href="link1.htm" title="Link title 1" target="_blank">Link #1</a>
<a href="link2.htm" title="Link title 2" target="_blank">Link #2</a>
<a href="link3.htm" title="Link title 3" target="_blank">Link #3</a>
</body> </html>';

// check if DomXML is available: if (!function_exists('DomDocument')){

   die('DomXML extension is not available :-(');


print '
// create new DOM object:
$dom = new DomDocument();
// load HTML code:
// get tags by tagname (all <a> tags / links):
$tags = $dom->getElementsByTagName('a');
// loop trough all links:
foreach ($tags as $a){
    print '<b>' . $a->nodeValue . '</b><br/>';
    // does this tag have attributes:
    if ($a->hasAttributes()){    
        // loop trough all attributes:
        foreach ($a->attributes as $attribute){      
            print '- ' . $attribute->name . ': ' . $attribute->value;
            print "<br/>";                
    print "<hr/>";
print '
'; </pre>