Retrieve the text of a URL

From CodeCodex

Revision as of 18:42, 15 February 2011 by 79.91.219.32 (Talk)

Implementations

Java

This simple class downloads html from a URL

import java.net.URL;
import java.net.HttpURLConnection;
import java.io.IOException;
import java.io.IOException;
import java.io.BufferedInputStream;
import java.io.InputStreamReader;
import java.io.Reader;

/**
 * Represents a webpage
 * @author Julius Schorzman
 * (c)2005 - provided as GPL
 */
public class Webpage {
	
	private StringBuilder html = new StringBuilder();
	
	/**
	 * Downloads html from a webpage
	 * @param url
	 * @throws IOException
	 */
	public Webpage(URL url) throws IOException {
	    
	    HttpURLConnection c = (HttpURLConnection)url.openConnection();
	    BufferedInputStream in = new BufferedInputStream(c.getInputStream());
	    Reader r = new InputStreamReader(in);	

	    int i;
	    while ((i = r.read()) != -1) {
	    	html.append((char) i);
	    }
	    
	    html.trimToSize();
	}
	
	/**
	 * Returns the html of this page as a String.
	 * @return The html
	 */
	public String getHtml() {
		return html.substring(0);
	}
}

OCaml

Http_client.Convenience.http_get "http://..."

Perl

<HIGHLIGHTSYNTAX language="perl"> use LWP::Simple qw(get); my $content = get 'http://example.com'; </HIGHLIGHTSYNTAX>

Python

 from urllib2 import urlopen
 print urlopen('http://example.com').read()

PHP

<HIGHLIGHTSYNTAX language="php"> $text = file_get_contents('http://example.com'); </HIGHLIGHTSYNTAX>

Tcl

<HIGHLIGHTSYNTAX language="tcl"> package require http puts [http::data [http::geturl http://www.codecodex.com/wiki/Main_Page]] </HIGHLIGHTSYNTAX>