4 * Copyright(c) 2008 olyutorskii
\r
5 * $Id: RegexPattern.java 904 2009-11-17 11:14:10Z olyutorskii $
\r
8 package jp.sourceforge.jindolf;
\r
10 import java.util.regex.Pattern;
\r
11 import java.util.regex.PatternSyntaxException;
\r
12 import jp.sourceforge.jindolf.json.JsBoolean;
\r
13 import jp.sourceforge.jindolf.json.JsObject;
\r
14 import jp.sourceforge.jindolf.json.JsPair;
\r
15 import jp.sourceforge.jindolf.json.JsString;
\r
16 import jp.sourceforge.jindolf.json.JsValue;
\r
21 public class RegexPattern{
\r
24 public static final int IGNORECASEFLAG =
\r
25 0x00000000 | Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
\r
26 private static final String REGEX_DELIM = "[\\s\u3000]+"; // 空白(全角含)
\r
27 private static final String REGEX_CHAR = ".?+*\\$(|)[]{}^-&";
\r
31 * 正規表現とまぎらわしい字を含むか判定する。
\r
33 * @return 紛らわしい字を含むならtrue
\r
35 public static boolean hasRegexChar(CharSequence seq){
\r
36 int length = seq.length();
\r
37 for(int pt = 0; pt < length; pt++){
\r
38 char ch = seq.charAt(pt);
\r
39 if(REGEX_CHAR.indexOf(ch) >= 0) return true;
\r
45 * 任意の文字列を必要に応じて正規表現シーケンス化する。
\r
47 * @return 引数と同じ内容の正規表現。必要がなければ引数そのまま
\r
49 public static String quote(String text){
\r
50 if(hasRegexChar(text)){
\r
51 return Pattern.quote(text);
\r
59 * @return JSON Object
\r
61 public static JsObject encodeJson(RegexPattern regex){
\r
62 JsObject result = new JsObject();
\r
64 int regexFlag = regex.getRegexFlag();
\r
65 boolean flagDotall = (regexFlag & Pattern.DOTALL) != 0;
\r
66 boolean flagMultiline = (regexFlag & Pattern.MULTILINE) != 0;
\r
67 boolean flagIgnoreCase = (regexFlag & IGNORECASEFLAG) != 0;
\r
69 JsPair source = new JsPair("source", regex.getEditSource());
\r
70 JsPair isRegex = new JsPair("isRegex", regex.isRegex());
\r
71 JsPair dotall = new JsPair("dotall", flagDotall);
\r
72 JsPair multiline = new JsPair("multiline", flagMultiline);
\r
73 JsPair ignorecase = new JsPair("ignorecase", flagIgnoreCase);
\r
74 JsPair comment = new JsPair("comment", regex.getComment());
\r
76 result.putPair(source);
\r
77 result.putPair(isRegex);
\r
78 result.putPair(dotall);
\r
79 result.putPair(multiline);
\r
80 result.putPair(ignorecase);
\r
81 result.putPair(comment);
\r
88 * @param object JSON Object
\r
91 public static RegexPattern decodeJson(JsObject object){
\r
95 value = object.getValue("source");
\r
96 if(value instanceof JsString){
\r
97 source = ((JsString)value).toRawString();
\r
103 value = object.getValue("isRegex");
\r
104 if(value instanceof JsBoolean){
\r
105 isRegex = ((JsBoolean)value).booleanValue();
\r
110 int regexFlag = 0x00000000;
\r
111 value = object.getValue("dotall");
\r
112 if(value instanceof JsBoolean){
\r
113 if(((JsBoolean)value).isTrue()){
\r
114 regexFlag |= Pattern.DOTALL;
\r
117 value = object.getValue("multiline");
\r
118 if(value instanceof JsBoolean){
\r
119 if(((JsBoolean)value).isTrue()){
\r
120 regexFlag |= Pattern.MULTILINE;
\r
123 value = object.getValue("ignorecase");
\r
124 if(value instanceof JsBoolean){
\r
125 if(((JsBoolean)value).isTrue()){
\r
126 regexFlag |= IGNORECASEFLAG;
\r
131 value = object.getValue("comment");
\r
132 if(value instanceof JsString){
\r
133 comment = ((JsString)value).toRawString();
\r
138 RegexPattern result =
\r
139 new RegexPattern(source, isRegex, regexFlag, comment);
\r
145 private final String editSource;
\r
146 private final boolean isRegex;
\r
147 private final Pattern pattern;
\r
148 private final String comment;
\r
153 * @param editSource リテラル文字列または正規表現
\r
154 * @param isRegex 指定文字列が正規表現ならtrue。リテラルならfalse
\r
155 * @param flag 正規表現フラグ
\r
156 * @param comment コメント
\r
157 * @throws java.util.regex.PatternSyntaxException 正規表現がおかしい
\r
159 public RegexPattern(String editSource,
\r
163 throws PatternSyntaxException{
\r
165 if(editSource == null) throw new NullPointerException();
\r
167 this.isRegex = isRegex;
\r
168 if(comment != null) this.comment = comment;
\r
169 else this.comment = "";
\r
173 this.editSource = editSource;
\r
174 regexExpr = this.editSource;
\r
176 String newSource = "";
\r
179 String[] tokens = editSource.split(REGEX_DELIM);
\r
180 for(String token : tokens){
\r
181 if(token == null || token.length() <= 0) continue;
\r
183 if(newSource.length() <= 0) newSource = token;
\r
184 else newSource += " " + token;
\r
186 String quoted = "(?:" + quote(token) + ")";
\r
187 if(regexExpr.length() <= 0) regexExpr = quoted;
\r
188 else regexExpr += "|" + quoted;
\r
191 this.editSource = newSource;
\r
194 this.pattern = Pattern.compile(regexExpr, flag);
\r
202 * @param editSource リテラル文字列または正規表現
\r
203 * @param isRegex 指定文字列が正規表現ならtrue。リテラルならfalse
\r
204 * @param flag 正規表現フラグ
\r
205 * @throws java.util.regex.PatternSyntaxException 正規表現がおかしい
\r
207 public RegexPattern(String editSource,
\r
210 throws PatternSyntaxException{
\r
211 this(editSource, isRegex, flag, " ");
\r
219 public String getEditSource(){
\r
220 return this.editSource;
\r
227 public String getComment(){
\r
228 return this.comment;
\r
232 * 元の入力文字列が正規表現か否か返す。
\r
233 * @return 正規表現ならtrue
\r
235 public boolean isRegex(){
\r
236 return this.isRegex;
\r
242 * @see java.util.regex.Pattern#flags()
\r
244 public int getRegexFlag(){
\r
245 return this.pattern.flags();
\r
249 * コンパイルされた正規表現形式を返す。
\r
250 * @return コンパイルされた正規表現形式
\r
252 public Pattern getPattern(){
\r
253 return this.pattern;
\r
258 * @return {@inheritDoc}
\r
261 public String toString(){
\r
262 return this.editSource;
\r
267 * @param obj {@inheritDoc}
\r
268 * @return {@inheritDoc}
\r
271 public boolean equals(Object obj){
\r
275 if( ! (obj instanceof RegexPattern) ){
\r
278 RegexPattern other = (RegexPattern) obj;
\r
280 String thisPattern = this.pattern.pattern();
\r
281 String otherPattern = other.pattern.pattern();
\r
283 if( ! thisPattern.equals(otherPattern) ) return false;
\r
285 if(this.pattern.flags() != other.pattern.flags()) return false;
\r
292 * @return {@inheritDoc}
\r
295 public int hashCode(){
\r
296 int hash = this.pattern.pattern().hashCode();
\r
297 hash ^= this.pattern.flags();
\r