Add File

2025-09-04 14:09:27 +08:00
parent 53fd67c123
commit 6ac84bf47b
1 changed files with 102 additions and 0 deletions
--- a/src/main/java/org/dromara/easyai/naturalLanguage/Talk.java
+++ b/src/main/java/org/dromara/easyai/naturalLanguage/Talk.java
@@ -0,0 +1,102 @@
+package org.dromara.easyai.naturalLanguage;
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author lidapeng
+ * @description 语句分类
+ * @date 4:14 下午 2020/2/23
+ */
+public class Talk {
+    private final List<WorldBody> allWorld;//所有词集合
+    private final WordTemple wordTemple;
+
+    public Talk(WordTemple wordTemple) {
+        this.wordTemple = wordTemple;
+        allWorld = wordTemple.getAllWorld();//所有词集合
+    }
+
+    public List<List<String>> getSplitWord(String sentence) {//单纯进行拆词
+        List<Sentence> sentences = splitSentence(sentence);
+        List<List<String>> words = new ArrayList<>();
+        for (Sentence sentence1 : sentences) {
+            words.add(sentence1.getKeyWords());
+        }
+        return words;
+    }
+
+    private List<Sentence> splitSentence(String sentence) {
+        String[] sens = sentence.replace(" ", "").split("，|。|？|！|；|、|：");
+        //拆词
+        List<Sentence> sentences = new ArrayList<>();
+        for (String mySentence : sens) {
+            List<Sentence> sentenceList = catchSentence(mySentence);
+            int key = 0;
+            int nub = 0;
+            for (int j = 0; j < sentenceList.size(); j++) {
+                Sentence sentence1 = sentenceList.get(j);
+                restructure(sentence1);
+                int size = sentence1.getKeyWords().size();
+                if (size > nub) {
+                    key = j;
+                    nub = size;
+                }
+            }
+            //System.out.println(sentenceList.get(key).getKeyWords());
+            sentences.add(sentenceList.get(key));
+        }
+        return sentences;
+    }
+
+    private List<Sentence> catchSentence(String sentence) {//把句子拆开
+        int len = sentence.length();
+        List<Sentence> sentences = new ArrayList<>();
+        if (len > 1) {
+            for (int j = 0; j < len - 1; j++) {
+                Sentence sentenceWords = new Sentence();
+                for (int i = j; i < len; i++) {
+                    String word = sentence.substring(j, i + 1);
+                    sentenceWords.setWord(word);
+                }
+                sentences.add(sentenceWords);
+            }
+        } else {
+            Sentence sentenceWords = new Sentence();
+            sentenceWords.setWord(sentence);
+            sentences.add(sentenceWords);
+        }
+        return sentences;
+    }
+
+    private void restructure(Sentence words) {//对句子里面的Word进行词频统计
+        List<WorldBody> listWord = allWorld;
+        List<Word> waitWorld = words.getWaitWords();
+        for (Word word : waitWorld) {
+            String myWord = word.getWord();
+            WorldBody body = getBody(myWord, listWord);
+            if (body == null) {//已经无法查找到对应的词汇了
+                word.setWordFrequency(1);
+                break;
+            }
+            listWord = body.getWorldBodies();//这个body报了一次空指针
+            word.setWordFrequency(body.getWordFrequency());
+        }
+        Tokenizer tokenizer = new Tokenizer(wordTemple);
+        tokenizer.radiation(words);
+
+    }
+
+    private WorldBody getBody(String word, List<WorldBody> worlds) {
+        //TODO 这里有个BUG 当myBody出现空的时候断词已经找不到了
+        WorldBody myBody = null;
+        for (WorldBody body : worlds) {
+            if (body.getWordName().hashCode() == word.hashCode() && body.getWordName().equals(word)) {
+                myBody = body;
+                break;
+            }
+        }
+        return myBody;
+    }
+}