Detect non-ASCII character in a String

From CodeCodex

Implementations[edit]

Java[edit]

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharacterCodingException;

public class TestAscii {

  public static void main (String args[])
    throws Exception {
     // this String throws an Exception, it contains an accented letter
     String test = "Réal";
     // this String is OK
     //String test = "Real";

     byte bytearray []  = test.getBytes();
     System.out.println("Test string : " + test);

     CharsetDecoder d = Charset.forName("US-ASCII").newDecoder();
     try {
       CharBuffer r = d.decode(ByteBuffer.wrap(bytearray));
       r.toString();
     }
     catch(CharacterCodingException e) {
       System.out.println("only regular ASCII characters please!");
       // interrupt the processing
       throw new Exception(e);
     }
     System.out.println("Ok, it's ASCII only!");
  }
}

Another way is to use a regular expression, see this Javascript HowTo for a hint!


To simply strip any non-ascii characters form a string

public class Test {
    public static void main(String args[]){
      String input = "eéaà";
      String output = input.replaceAll("[^\\p{ASCII}]", "");
      System.out.println(output);
      /*
       * output : ea
       */
    }
  }

Seed7[edit]

$ include "seed7_05.s7i";

const func boolean: isAscii (in string: stri) is func
  result
    var boolean: isAscii is TRUE;
  local
    var integer: index is 1;
  begin
    while index < length(stri) and isAscii do
      if stri[index] > '\127\' then
        isAscii := FALSE;
      end if;
      incr(index);
    end while;
  end func;

const proc: main is func
  begin
    writeln(isAscii("ASDFasdf123+-*"));
    writeln(isAscii("Réal"));
  end func;