Retrieve the text of a URL

From CodeCodex

Implementations[edit]

Java[edit]

This simple class downloads html from a URL

import java.net.URL;
import java.net.HttpURLConnection;
import java.io.IOException;
import java.io.IOException;
import java.io.BufferedInputStream;
import java.io.InputStreamReader;
import java.io.Reader;

/**
 * Represents a webpage
 * @author Julius Schorzman
 * (c)2005 - provided as GPL
 */
public class Webpage {
	
	private StringBuilder html = new StringBuilder();
	
	/**
	 * Downloads html from a webpage
	 * @param url
	 * @throws IOException
	 */
	public Webpage(URL url) throws IOException {
	    
	    HttpURLConnection c = (HttpURLConnection)url.openConnection();
	    BufferedInputStream in = new BufferedInputStream(c.getInputStream());
	    Reader r = new InputStreamReader(in);	

	    int i;
	    while ((i = r.read()) != -1) {
	    	html.append((char) i);
	    }
	    
	    html.trimToSize();
	}
	
	/**
	 * Returns the html of this page as a String.
	 * @return The html
	 */
	public String getHtml() {
		return html.substring(0);
	}
}

OCaml[edit]

Http_client.Convenience.http_get "http://..."

Perl[edit]

use LWP::Simple qw(get);
my $content = get 'http://example.com';

Python[edit]

 from urllib2 import urlopen
 print urlopen('http://example.com').read()

PHP[edit]

$text = file_get_contents('http://example.com');

Tcl[edit]

package require http
puts [http::data [http::geturl http://www.codecodex.com/wiki/Main_Page]]