做一个简单的中文字典树,稍比英文的复杂一些,英文无非26个树杈而且排序方便,用数组表示树杈即可
而中文相对复杂一些,一个中文2个字节一个char就够了,但是树杈个数不定,对树杈进行排序用数组来做也相对复杂,因此考虑用二叉搜索树代替数组来做子节点的存储结构
一个字典树节点链接一个二叉搜索树,二叉树上每个支上又挂载一个字典树节点
部分代码如下:
j.analysis;/*** 汉字字典树* * @author Administrator* */
public class Trie {// 子节点private BSTree children;// 该节点存储的数据private Character nodeValue;// 是否词尾 0非词尾 1词尾private int nodeState = 0;public Trie(Character value) {if (value == null)throw new IllegalArgumentException("参数异常不能为空");deValue = value;}/*** 匹配词段* * @param charArray* @return Hit*/public Hit match(char[] charArray, int begin, int length, Hit searchHit) {if (searchHit == null) {searchHit = new Hit();searchHit.setBegin(begin);} else {searchHit.setUnmatch();}searchHit.setEnd(begin);Character keyChar = new Character(charArray[begin]);Trie ds = null;BSTree segmentArray = this.children;if (segmentArray != null) {Trie keySegment = new Trie(keyChar);BSTree bs = null;if ((bs = segmentArray.findNode(segmentArray, keySegment)) != null)ds = bs.getValue();}if (ds != null) {if (length > 1) {// 字符未匹配完return ds.match(charArray, begin + 1, length - 1, searchHit);} else if (length == 1) {if (ds.nodeState == 1) {searchHit.setMatch();}if (ds.children != null) {searchHit.setPrefix();searchHit.setMatchedDictSegment(ds);}return searchHit;}}return searchHit;}/*** 添加*/public synchronized void fillSegment(char[] charArray, int begin, int length) {Character beginChar = charArray[begin];Trie trie = new Trie(beginChar);if (length == 1) {deState = 1;}BSTree bs = children;if (children == null) {children = new BSTree(trie);}BSTree node = children.findNode(children, trie);if (node == null) {children.addNode(children, trie);node = children.findNode(children, trie);}if (length > 1) {Value().fillSegment(charArray, begin + 1, length - 1);}}public BSTree getChildren() {return children;}public Character getNodeValue() {return nodeValue;}public int getNodeState() {return nodeState;}}
j.analysis;public class BSTree {private BSTree left;private BSTree right;private int freqs;private Trie value;public BSTree(Trie value){this.value = value;this.left = null;this.right = null;this.freqs = 1;}/*** 添加节点* @param src* @param value* @return*/public BSTree addNode(BSTree src,Trie value){if(src == null){src = new BSTree(value);}else if(NodeValue() > NodeValue()){src.left = addNode(src.left, value);}else if(NodeValue() < NodeValue()){src.right = addNode(src.right, value);}elsesrc.freqs += 1;balence();return src;}/*** 查找节点* @param src* @param value* @return*/public BSTree findNode(BSTree src,Trie value){if(src == null){return null;}BSTree node = null;NodeValue() < NodeValue()){node = findNode(src.left,value);}else NodeValue() > NodeValue()){node = findNode(src.right,value);}elsenode = src;return node;}public BSTree getLeft() {return left;}public BSTree getRight() {return right;}public int getFreqs() {return freqs;}public Trie getValue() {return value;}/*** 平衡方法,暂不考虑*/private void balence(){}
}
测试类
st;import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;j.analysis.Hit;
j.analysis.Trie;public class Main {//测试字典,已通过public static void main(String[] args) throws IOException {BufferedReader br = new BufferedReader(new FileReader(new File("C:\Users\Administrator\Desktop\")));String content = null;Trie trie = new Trie('0');while((content = br.readLine())!=null){content = new Bytes("GBK"),"UTF-8");String words = content.split("[t ]+")[0];trie.CharArray(), 0, words.length());}br.close();Hit searchHit = new Hit();searchHit = trie.match("阿猫".toCharArray(), 0, 2, searchHit);System.out.println(searchHit.isMatch());}
}
本文发布于:2024-01-29 15:07:12,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170651203316139.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |