X-Git-Url: http://git.megacz.com/?p=org.ibex.util.git;a=blobdiff_plain;f=src%2Forg%2Fibex%2Futil%2FRobustDateParser.java;fp=src%2Forg%2Fibex%2Futil%2FRobustDateParser.java;h=3340dd88bb2da63b679e3fb143a31d05f2ce4a4b;hp=0000000000000000000000000000000000000000;hb=a1a166a9e2203a13e339ffceae0259a814fb0b4a;hpb=f264a99b651e7a2d352934f77447ac2d839971b6 diff --git a/src/org/ibex/util/RobustDateParser.java b/src/org/ibex/util/RobustDateParser.java new file mode 100644 index 0000000..3340dd8 --- /dev/null +++ b/src/org/ibex/util/RobustDateParser.java @@ -0,0 +1,135 @@ +package org.ibex.util; +import java.util.*; +import java.io.*; + +// FEATURE: cope with stuff like "next tuesday" or "in three hours" + +/** prototype parser for free-form dates; mostly needs to handle RFC2822 Date field */ +public class RobustDateParser { + + private static final int CHARCLASS_OTHER = 0; + private static final int CHARCLASS_WS = 1; + private static final int CHARCLASS_ALPHA = 2; + private static final int CHARCLASS_NUM = 3; + private static final int CHARCLASS_SYM = 4; + + public static int charClass(char c) { + if (Character.isWhitespace(c)) return CHARCLASS_WS; + if (Character.isLetter(c)) return CHARCLASS_ALPHA; + if (Character.isDigit(c)) return CHARCLASS_NUM; + return CHARCLASS_SYM; + } + + public static int parseMonth(String s) { + s = s.trim().substring(0, 3).toLowerCase(); + if (s.equals("jan")) return 1; + if (s.equals("feb")) return 2; + if (s.equals("mar")) return 3; + if (s.equals("apr")) return 4; + if (s.equals("may")) return 5; + if (s.equals("jun")) return 6; + if (s.equals("jul")) return 7; + if (s.equals("aug")) return 8; + if (s.equals("sep")) return 9; + if (s.equals("oct")) return 10; + if (s.equals("nov")) return 11; + if (s.equals("dec")) return 12; + return -1; + } + + public static String[] tokenize(String s) { + Vec components = new Vec(); + String tok = ""; + for(int i=0; i 0 && charClass(c) != charClass(tok.charAt(tok.length()-1))) { + components.add(tok); + tok = ""; + i--; + continue; + } + if (charClass(c) == CHARCLASS_WS) continue; + tok += c; + } + if (tok.length() > 0) + components.add(tok); + String[] ret = new String[components.size()]; + components.toArray(ret); + return ret; + } + + public static Date parseDate(String s) { + if (s==null) return null; + try { + return parseDate_(s); + } catch (Exception e) { + Log.error(RobustDateParser.class, e); + return null; + } + } + public static Date parseDate_(String s) { + String[] toks = tokenize(s); + int[] cclass = new int[toks.length]; + for(int i=0; i0 && cclass[i-1]==CHARCLASS_NUM && toks[i-1].length()<=2) + day = Integer.parseInt(toks[i-1]); + } else if (tz==null && toks[i].length()==3 && toks[i].toUpperCase().equals(toks[i])) { + tz = TimeZone.getTimeZone(toks[i]); + } + } + if (cclass[i]==CHARCLASS_NUM) { + if (toks[i].length()==4) { + int y = Integer.parseInt(toks[i]); + if (y > 1960 && y < 2100) + year = Integer.parseInt(toks[i]); + else if (i>0 && (toks[i-1].equals("+") || toks[i-1].equals("-"))) { + String st = (toks[i-1]+toks[i]).trim(); + while (st.length() > 0 && st.charAt(0)=='+') st = st.substring(1); + int ofs = Integer.parseInt(st); + if (ofs % 100 == 0) + tz = new SimpleTimeZone((ofs / 100) * 60 * 60 * 1000, (toks[i-1]+toks[i])); + } + } else if (i0 && toks[i-1].equals(":")) { + if (sec == -1) sec = Integer.parseInt(toks[i]); + } + } + } + + GregorianCalendar gc = new GregorianCalendar(); + if (tz != null) gc.setTimeZone(tz); + gc.set(year, month-1, day, hour, min, sec); + Date d = gc.getTime(); + + /* + StringBuffer sb = new StringBuffer(); + sb.append("components: "); + for(String t : toks) + sb.append("\""+t+"\" "); + Log.warn("RobustDateParser", + sb.toString() + "\n" + + "components: " + year+":"+month+":"+day+":"+hour+":"+min+":"+sec + "\n" + + "parsed as: " + d); + */ + return d; + } + +}