This blog entry is to remind myself how to use regular expression for different languages.
Java
Pattern pattern = Pattern.compile("Hello, (.+)");
Matcher matcher = pattern.matcher("Hello, Franz");
if (matcher.matches()) {
System.out.println(matcher.group(0)); // This will print "Hello, Franz"
System.out.println(matcher.group(1)); // This will print "Franz"
}
Rust
let str = "Hello, Franz";
let re = Regex::new("Hello, (.+)").unwrap();
match re.captures(str) {
Some(caps) => {
println!("{}", &caps[0]); // This will print "Hello, Franz"
println!("{}", &caps[1]); // This will print "Franz"
},
None => (),
}
Python
pattern = re.compile("Hello, (.+)")
matcher = pattern.search("Hello, Franz")
if matcher is not None:
print(matcher.group(0)) # This will print "Hello, Franz"
print(matcher.group(1)) # This will print "Franz"
Javascript
const str = 'Hello, Franz';
// Regular expression is compiled when script is loaded
const pattern1 = /Hello, (.+)/;
// Regular expression is compiled when script is executed
const pattern2 = new RegExp('Hello, (.+)');
// match() returns all matches while exec() returns first match
const matches1 = str.match(pattern1);
if (matches1) {
console.log(matches1[0]); // This will print "Hello, Franz"
console.log(matches1[1]); // This will print "Franz"
}
grep
#!/bin/bash
echo "12345678 is my phone number" | grep -oE "[[:digit:]]+" # This will print "12345678"
# GNU grep has -P option
echo "12345678 is my phone number" | grep -oP "\d+" # This will print "12345678"
sed
#!/bin/bash
# -n to prevent printing result twice
# -E to use extended regular expressions
echo 'Hello, Franz' | sed -nE 's/Hello, (.+)/\1/p' # This will print "Franz"
gawk
#!/bin/bash
# $0 is the original string without field splitting
echo 'Hello, Franz' | gawk 'match($0, /Hello, (.+)/, groups) { print groups[1] }' # This will print "Franz"
Top comments (0)