Java实现AC自动机全文检索示例
发布时间 - 2026-01-10 22:56:16 点击率:次第一步,构建Trie树,定义Node类型:
/**
* Created by zhaoyy on 2017/2/7.
*/
interface Node {
char value();
boolean exists();
boolean isRoot();
Node parent();
Node childOf(char c);
Node fail();
void setFail(Node node);
void setExists(boolean exists);
void add(Node child);
List<Node> children();
}
第二步,实现两种Node,如果词汇全是可打印的ASCII字符,就采用AsciiNode,否则(比如包含汉字),使用基于hash表的MapNode;这两种Node均集成自AbstractNode:
/**
* Created by zhaoyy on 2017/2/8.
*/
abstract class AbstractNode implements Node {
private static final char EMPTY = '\0';
private final char value;
private final Node parent;
private boolean exists;
private Node fail;
public AbstractNode(Node parent, char value) {
this.parent = parent;
this.value = value;
this.exists = false;
this.fail = null;
}
public AbstractNode() {
this(null, EMPTY);
}
private static String tab(int n) {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < n; i++) {
builder.append('\t');
}
return builder.toString();
}
private static String toString(Node node, int depth) {
StringBuilder builder = new StringBuilder();
String tab = tab(depth);
Node fail = node.fail();
Node parent = node.parent();
builder
.append(tab)
.append('<')
.append(node.value())
.append(" exists=\"")
.append(node.exists())
.append('"')
.append(" parent=\"")
.append(parent == null ? "null" : parent.value())
.append('"')
.append(" fail=\"")
.append(fail == null ? "null" : fail.value())
.append("\">\r\n");
for (Node child : node.children())
builder.append(toString(child, depth + 1));
builder
.append(tab)
.append("</")
.append(node.value())
.append(">\r\n");
return builder.toString();
}
@Override
public char value() {
return value;
}
@Override
public boolean exists() {
return exists;
}
@Override
public boolean isRoot() {
return value == EMPTY;
}
@Override
public Node parent() {
return parent;
}
@Override
public Node fail() {
return fail;
}
@Override
public void setFail(Node node) {
this.fail = node;
}
@Override
public void setExists(boolean exists) {
this.exists = exists;
}
@Override
public String toString() {
return toString(this, 0);
}
}
/////////////////////////////////////////////////////////////////////////////////////////////
/**
* Created by zhaoyy on 2017/2/8.
*/
final class AsciiNode extends AbstractNode implements Node {
private static final char FROM = 32;
private static final char TO = 126;
private final Node[] children;
public AsciiNode() {
super();
this.children = new Node[TO - FROM + 1];
}
public AsciiNode(Node parent, char value) {
super(parent, value);
this.children = new Node[TO - FROM + 1];
}
@Override
public Node childOf(char c) {
if (c >= FROM && c <= TO)
return children[(int) c - FROM];
else return null;
}
@Override
public void add(Node child) {
int index = (int) child.value();
children[index - FROM] = child;
}
@Override
public List<Node> children() {
List<Node> nodes = new ArrayList<Node>();
for (Node child : children)
if (child != null)
nodes.add(child);
return nodes;
}
}
//////////////////////////////////////////////////////////////////////////////////////////////
/**
* Created by zhaoyy on 2017/2/8.
*/
final class MapNode extends AbstractNode implements Node {
private final Map<Character, Node> children;
public MapNode() {
super();
this.children = new HashMap<Character, Node>();
}
public MapNode(Node parent, char value) {
super(parent, value);
this.children = new HashMap<Character, Node>();
}
@Override
public Node childOf(char c) {
return children.get(c);
}
@Override
public void add(Node child) {
children.put(child.value(), child);
}
@Override
public List<Node> children() {
List<Node> nodes = new ArrayList<Node>();
nodes.addAll(children.values());
return nodes;
}
}
第三步,
首先定义一个Node构造器:
/**
* Created by zhaoyy on 2017/2/8.
*/
public interface NodeMaker {
Node make(Node parent, char value);
Node makeRoot();
}
然后构建AC自动机,实现创建及查找方法:
/**
* Created by zhaoyy on 2017/2/7.
*/
public final class WordTable {
private final Node root;
private WordTable(Collection<? extends CharSequence> words, NodeMaker maker) {
Node root = buildTrie(words, maker);
setFailNode(root);
this.root = root;
}
public static WordTable compile(Collection<? extends CharSequence> words) {
if (words == null || words.isEmpty())
throw new IllegalArgumentException();
final NodeMaker maker;
if (isAscii(words))
maker = new NodeMaker() {
@Override
public Node make(Node parent, char value) {
return new AsciiNode(parent, value);
}
@Override
public Node makeRoot() {
return new AsciiNode();
}
};
else maker = new NodeMaker() {
@Override
public Node make(Node parent, char value) {
return new MapNode(parent, value);
}
@Override
public Node makeRoot() {
return new MapNode();
}
};
return new WordTable(words, maker);
}
private static boolean isAscii(Collection<? extends CharSequence> words) {
for (CharSequence word : words) {
int len = word.length();
for (int i = 0; i < len; i++) {
int c = (int) word.charAt(i);
if (c < 32 || c > 126)
return false;
}
}
return true;
}
private static Node buildTrie(Collection<? extends CharSequence> sequences, NodeMaker maker) {
Node root = maker.makeRoot();
for (CharSequence sequence : sequences) {
int len = sequence.length();
Node current = root;
for (int i = 0; i < len; i++) {
char c = sequence.charAt(i);
Node node = current.childOf(c);
if (node == null) {
node = maker.make(current, c);
current.add(node);
}
current = node;
if (i == len - 1)
node.setExists(true);
}
}
return root;
}
private static void setFailNode(final Node root) {
root.setFail(null);
Queue<Node> queue = new LinkedList<Node>();
queue.add(root);
while (!queue.isEmpty()) {
Node parent = queue.poll();
Node temp;
for (Node child : parent.children()) {
if (parent.isRoot())
child.setFail(root);
else {
temp = parent.fail();
while (temp != null) {
Node node = temp.childOf(child.value());
if (node != null) {
child.setFail(node);
break;
}
temp = temp.fail();
}
if (temp == null)
child.setFail(root);
}
queue.add(child);
}
}
}
public boolean findAnyIn(CharSequence cs) {
int len = cs.length();
Node node = root;
for (int i = 0; i < len; i++) {
Node next = node.childOf(cs.charAt(i));
if (next == null) {
next = node.fail();
if (next == null) {
node = root;
continue;
}
}
if (next.exists())
return true;
}
return false;
}
public List<MatchInfo> search(CharSequence cs) {
if (cs == null || cs.length() == 0)
return Collections.emptyList();
List<MatchInfo> result = new ArrayList<MatchInfo>();
int len = cs.length();
Node node = root;
for (int i = 0; i < len; i++) {
Node next = node.childOf(cs.charAt(i));
if (next == null) {
next = node.fail();
if (next == null) {
node = root;
continue;
}
}
if (next.exists()) {
MatchInfo info = new MatchInfo(i, next);
result.add(info);
node = root;
continue;
}
node = next;
}
return result;
}
@Override
public String toString() {
return root.toString();
}
}
定义一个保存查找结果的实体:
/**
* Created by zhaoyy on 2017/2/7.
*/
public final class MatchInfo {
private final int index;
private final String word;
public MatchInfo(int index, String word) {
this.index = index;
this.word = word;
}
public MatchInfo(int index, Node node) {
StringBuilder builder = new StringBuilder();
while (node != null) {
if (!node.isRoot())
builder.append(node.value());
node = node.parent();
}
String word = builder.reverse().toString();
this.index = index + 1 - word.length();
this.word = word;
}
public int getIndex() {
return index;
}
public String getWord() {
return word;
}
@Override
public String toString() {
return index + ":" + word;
}
}
第四步,调用Demo:
public static void main(String[] args) {
List<String> list = Arrays.asList("say", "her", "he", "she", "shr", "alone");
WordTable table = WordTable.compile(list);
System.out.println(table);
System.out.println(table.search("1shesaynothingabouthislivinghimalone"));
}
以下是输出结果:
< exists="false" parent="null" fail="null"> <s exists="false" parent=" " fail=" "> <a exists="false" parent="s" fail="a"> <y exists="true" parent="a" fail=" "> </y> </a> <h exists="false" parent="s" fail="h"> <e exists="true" parent="h" fail="e"> </e> <r exists="true" parent="h" fail=" "> </r> </h> </s> <h exists="false" parent=" " fail=" "> <e exists="true" parent="h" fail=" "> <r exists="true" parent="e" fail=" "> </r> </e> </h> <a exists="false" parent=" " fail=" "> <l exists="false" parent="a" fail=" "> <o exists="false" parent="l" fail=" "> <n exists="false" parent="o" fail=" "> <e exists="true" parent="n" fail=" "> </e> </n> </o> </l> </a> </ > [1:she, 4:say, 31:alone]
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。
# java实现全文检索
# java
# 全文检索
# 用java实现全文检索
# 详解Java中AC自动机的原理与实现
# java编程之AC自动机工作原理与实现代码
# Java数据结构之AC自动机算法的实现
# 两种
# 这两种
# 第二步
# 第三步
# 大家多多
# 第四步
# abstract
# AbstractNode
# static
# final
# implements
# private
# 全是可
# ASCII
# false
# MapNode
# AsciiNode
# hash
# append
# builder
相关栏目:
【
网站优化151355 】
【
网络推广146373 】
【
网络技术251813 】
【
AI营销90571 】
相关推荐:
如何快速搭建二级域名独立网站?
品牌网站制作公司有哪些,买正品品牌一般去哪个网站买?
学生网站制作软件,一个12岁的学生写小说,应该去什么样的网站?
微信推文制作网站有哪些,怎么做微信推文,急?
东莞专业网站制作公司有哪些,东莞招聘网站哪个好?
如何用美橙互联一键搭建多站合一网站?
Android自定义listview布局实现上拉加载下拉刷新功能
Laravel怎么创建控制器Controller_Laravel路由绑定与控制器逻辑编写【指南】
利用vue写todolist单页应用
Laravel Octane如何提升性能_使用Laravel Octane加速你的应用
Android自定义控件实现温度旋转按钮效果
laravel怎么为应用开启和关闭维护模式_laravel应用维护模式开启与关闭方法
Windows10如何更改计算机工作组_Win10系统属性修改Workgroup
Laravel任务队列怎么用_Laravel Queues异步处理任务提升应用性能
Laravel怎么使用Blade模板引擎_Laravel模板继承与Component组件复用【手册】
中国移动官方网站首页入口 中国移动官网网页登录
Laravel Debugbar怎么安装_Laravel调试工具栏配置指南
Laravel如何实现密码重置功能_Laravel密码找回与重置流程
Laravel如何使用Guzzle调用外部接口_Laravel发起HTTP请求与JSON数据解析【详解】
深圳防火门网站制作公司,深圳中天明防火门怎么编码?
ChatGPT怎么生成Excel公式_ChatGPT公式生成方法【指南】
Windows11怎样设置电源计划_Windows11电源计划调整攻略【指南】
如何快速辨别茅台真假?关键步骤解析
HTML5打空格有哪些误区_新手常犯的空格使用错误【技巧】
Laravel如何处理CORS跨域请求?(配置示例)
使用豆包 AI 辅助进行简单网页 HTML 结构设计
Laravel如何处理文件上传_Laravel Storage门面实现文件存储与管理
Laravel怎么连接多个数据库_Laravel多数据库连接配置
打造顶配客厅影院,这份100寸电视推荐名单请查收
香港服务器网站卡顿?如何解决网络延迟与负载问题?
网站制作报价单模板图片,小松挖机官方网站报价?
油猴 教程,油猴搜脚本为什么会网页无法显示?
如何注册花生壳免费域名并搭建个人网站?
如何用IIS7快速搭建并优化网站站点?
iOS验证手机号的正则表达式
JS中使用new Date(str)创建时间对象不兼容firefox和ie的解决方法(两种)
Android实现代码画虚线边框背景效果
Laravel如何实现数据导出到PDF_Laravel使用snappy生成网页快照PDF【方案】
如何自定义建站之星网站的导航菜单样式?
如何在万网ECS上快速搭建专属网站?
canvas 画布在主流浏览器中的尺寸限制详细介绍
香港服务器租用每月最低只需15元?
,南京靠谱的征婚网站?
如何在Windows环境下新建FTP站点并设置权限?
Microsoft Edge如何解决网页加载问题 Edge浏览器加载问题修复
打开php文件提示内存不足_怎么调整php内存限制【解决方案】
Claude怎样写结构化提示词_Claude结构化提示词写法【教程】
Laravel定时任务怎么设置_Laravel Crontab调度器配置
Laravel Asset编译怎么配置_Laravel Vite前端构建工具使用
Win11应用商店下载慢怎么办 Win11更改DNS提速下载【修复】

