java语言 百分网手机站

如何使用Java实现AC自动机全文检索实例

时间:2020-10-23 18:29:45 java语言 我要投稿

如何使用Java实现AC自动机全文检索实例

  导语:如何使用Java实现AC自动机全文检索,下面是小编给大家推荐的代码实现过程,大家可以参考阅读,更多详情请关注应届毕业生考试网。

  第一步,构建Trie树,定义Node类型:

  /**

  * Created by zhaoyy on 2017/2/7.

  */

  interface Node {

  char value();

  boolean exists();

  boolean isRoot();

  Node parent();

  Node childOf(char c);

  Node fail();

  void setFail(Node node);

  void setExists(boolean exists);

  void add(Node child);

  List<Node> children();

  }

  第二步,实现两种Node,如果词汇全是可打印的'ASCII字符,就采用AsciiNode,否则(比如包含汉字),使用基于hash表的MapNode;这两种Node均集成自AbstractNode:

  /**

  * Created by zhaoyy on 2017/2/8.

  */

  abstract class AbstractNode implements Node {

  private static final char EMPTY = '\0';

  private final char value;

  private final Node parent;

  private boolean exists;

  private Node fail;

  public AbstractNode(Node parent, char value) {

  this.parent = parent;

  this.value = value;

  this.exists = false;

  this.fail = null;

  }

  public AbstractNode() {

  this(null, EMPTY);

  }

  private static String tab(int n) {

  StringBuilder builder = new StringBuilder();

  for (int i = 0; i < n; i++) {

  builder.append('\t');

  }

  return builder.toString();

  }

  private static String toString(Node node, int depth) {

  StringBuilder builder = new StringBuilder();

  String tab = tab(depth);

  Node fail = node.fail();

  Node parent = node.parent();

  builder

  .append(tab)

  .append('<')

  .append(node.value())

  .append(" exists=\"")

  .append(node.exists())

  .append('"')

  .append(" parent=\"")

  .append(parent == null ? "null" : parent.value())

  .append('"')

  .append(" fail=\"")

  .append(fail == null ? "null" : fail.value())

  .append("\">\r\n");

  for (Node child : node.children())

  builder.append(toString(child, depth + 1));

  builder

  .append(tab)

  .append("</")

  .append(node.value())

  .append(">\r\n");

  return builder.toString();

  }

  @Override

  public char value() {

  return value;

  }

  @Override

  public boolean exists() {

  return exists;

  }

  @Override

  public boolean isRoot() {

  return value == EMPTY;

  }

  @Override

  public Node parent() {

  return parent;

  }

  @Override

  public Node fail() {

  return fail;

  }

  @Override

  public void setFail(Node node) {

  this.fail = node;

  }

  @Override

  public void setExists(boolean exists) {

  this.exists = exists;

  }

  @Override

  public String toString() {

  return toString(this, 0);

  }

  }

  /////////////////////////////////////////////////////////////////////////////////////////////

  /**

  * Created by zhaoyy on 2017/2/8.

  */

  final class AsciiNode extends AbstractNode implements Node {

  private static final char FROM = 32;

  private static final char TO = 126;

  private final Node[] children;

  public AsciiNode() {

  super();

  this.children = new Node[TO - FROM + 1];

  }

  public AsciiNode(Node parent, char value) {

  super(parent, value);

  this.children = new Node[TO - FROM + 1];

  }

  @Override

  public Node childOf(char c) {

  if (c >= FROM && c <= TO)

  return children[(int) c - FROM];

  else return null;

  }

  @Override

  public void add(Node child) {

  int index = (int) child.value();

  children[index - FROM] = child;

  }

  @Override

  public List<Node> children() {

  List<Node> nodes = new ArrayList<Node>();

  for (Node child : children)

  if (child != null)

  nodes.add(child);

  return nodes;

  }

  }

  //////////////////////////////////////////////////////////////////////////////////////////////

  /**

  * Created by zhaoyy on 2017/2/8.

  */

  final class MapNode extends AbstractNode implements Node {

  private final Map<Character, Node> children;

  public MapNode() {

  super();

  this.children = new HashMap<Character, Node>();

  }

  public MapNode(Node parent, char value) {

  super(parent, value);

  this.children = new HashMap<Character, Node>();

  }

  @Override

  public Node childOf(char c) {

  return children.get(c);

  }

  @Override

  public void add(Node child) {

  children.put(child.value(), child);

  }

  @Override

  public List<Node> children() {

  List<Node> nodes = new ArrayList<Node>();

  nodes.addAll(children.values());

  return nodes;

  }

  }

  第三步,

  首先定义一个Node构造器:

  /**

  * Created by zhaoyy on 2017/2/8.

  */

  public interface NodeMaker {

  Node make(Node parent, char value);

  Node makeRoot();

  }

  然后构建AC自动机,实现创建及查找方法:

  /**

  * Created by zhaoyy on 2017/2/7.

  */

  public final class WordTable {

  private final Node root;

  private WordTable(Collection<? extends CharSequence> words, NodeMaker maker) {

  Node root = buildTrie(words, maker);

  setFailNode(root);

  this.root = root;

  }

  public static WordTable compile(Collection<? extends CharSequence> words) {

  if (words == null || words.isEmpty())

  throw new IllegalArgumentException();

  final NodeMaker maker;

  if (isAscii(words))

  maker = new NodeMaker() {

  @Override

  public Node make(Node parent, char value) {