diff --git a/.idea/compiler.xml b/.idea/compiler.xml
index a50c87d..0e19055 100644
--- a/.idea/compiler.xml
+++ b/.idea/compiler.xml
@@ -16,6 +16,7 @@
+
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
index d6c99ac..50cf68e 100644
--- a/.idea/encodings.xml
+++ b/.idea/encodings.xml
@@ -8,5 +8,7 @@
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__mysql_mysql_connector_java_5_1_29.xml b/.idea/libraries/Maven__mysql_mysql_connector_java_5_1_29.xml
deleted file mode 100644
index 5532a0b..0000000
--- a/.idea/libraries/Maven__mysql_mysql_connector_java_5_1_29.xml
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 273b71e..20293e3 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -7,7 +7,10 @@
-
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
index 921be69..6395727 100644
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -5,6 +5,7 @@
+
\ No newline at end of file
diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java
index dc744bf..c3f0919 100644
--- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java
+++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java
@@ -7,16 +7,13 @@ import com.alibaba.fastjson.serializer.SerializerFeature;
import com.bfd.crawler.utils.JsonUtils;
import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.config.ESConstants;
-import com.bfd.mf.job.domain.entity.Subject;
import com.bfd.mf.job.domain.entity.Task;
import com.bfd.mf.job.domain.repository.SubjectRepository;
import com.bfd.mf.job.domain.repository.TaskRepository;
import com.bfd.mf.job.download.DownLoadFile;
-import com.bfd.mf.job.service.WriterTXTService;
import com.bfd.mf.job.util.*;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.RateLimiter;
-import kafka.utils.Json;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.assertj.core.util.Lists;
import org.elasticsearch.index.query.*;
@@ -26,15 +23,8 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.IOException;
-import java.math.BigDecimal;
import java.math.BigInteger;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.net.URLConnection;
import java.sql.Timestamp;
import java.util.*;
import java.util.concurrent.BlockingQueue;
diff --git a/cl_search_api/cl_search_api.iml b/cl_search_api/cl_search_api.iml
index dcb9b34..952369f 100644
--- a/cl_search_api/cl_search_api.iml
+++ b/cl_search_api/cl_search_api.iml
@@ -162,7 +162,8 @@
-
+
+
diff --git a/cl_search_api/pom.xml b/cl_search_api/pom.xml
index fad6989..f0ddb5c 100644
--- a/cl_search_api/pom.xml
+++ b/cl_search_api/pom.xml
@@ -13,7 +13,7 @@
cl_search_api
Search V3.2 API
cl_search_api
- 3.2-SNAPSHOT
+ 3.2.4-SNAPSHOT
@@ -138,7 +138,7 @@
mysql
mysql-connector-java
- 5.1.29
+ 8.0.30
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
index 5647efe..6e6c725 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
@@ -807,7 +807,14 @@ public class ESConstant {
ESConstant.VIDEOTIME, // 视频的时长
ESConstant.OCRTEXT, // 图像识别结果
- ESConstant.ASRTEXT // 语音识别结果
+ ESConstant.ASRTEXT, // 语音识别结果
+
+ // 判断评论数据的类型,评论/转发/点赞
+ // pageType = socialComment 评论
+ // pageType = socialFollow userType = 0
+ // pageType = socialFollow userType = 1
+ ESConstant.PAGETYPE,
+ ESConstant.USER_TYPE
);
// 总体分析要用的字段
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/TagWordUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/TagWordUtils.java
index 4b266cc..25d5d5d 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/TagWordUtils.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/TagWordUtils.java
@@ -1,518 +1,518 @@
-package com.bfd.mf.common.util.utility;
-
-import org.apache.commons.collections.CollectionUtils;
-
-import java.lang.reflect.Array;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Set;
-
-public class TagWordUtils {
- private String tagBegin;
- private String tagEnd;
- Branch frontbegin = null;
- Set keyWords = new HashSet<>();
-
- public Set getKeyWords() {
- return keyWords;
- }
-
- public String[] getKeyWordsArray() {
- if (CollectionUtils.isNotEmpty(keyWords)) {
- String[] keyWordsArray = new String[keyWords.size()];
- return keyWords.toArray(keyWordsArray);
- }
- return null;
- }
-
- public TagWordUtils(String begin, String end) {
- this.tagBegin = begin;
- this.tagEnd = end;
- }
-
- public TagWordUtils addKeyWords(String[] keyWord) {
- if (keyWord.length > 0) {
- for (int i = 0; i < keyWord.length; i++) {
- this.keyWords.add(keyWord[i].trim());
- }
- }
- return this;
- }
-
- // 是否发现词
- boolean findWord = false;
-
-
- /**
- * Eric Added 2017-05-15 19:28:43
- * If keep break lines \n in content
- *
- * @param content
- * @param keepBr
- * @return
- */
- public String getTagContentWithBr(String content, boolean keepBr) {
- if (content == null || content.trim().length() == 0
- || keyWords.size() == 0) {
- return content;
- }
- // 对原始高亮的信息,进行去掉标签的操作;并且进行去掉 \n \r 以及特殊字符
- content = content.replaceAll(" ", "").replaceAll(" ", "").replaceAll("\t", "").replaceAll(" ", "")
- .replaceAll(" ", "");
- if (!keepBr) {
- content = content.replaceAll("\r", "").replaceAll("\n", "");
- }
- this.frontbegin = new MakeLibrary().getStringTree(this.keyWords);
- if (frontbegin == null) {
- return content;
- }
- char[] chars = content.toCharArray();
- // 正文
- StringBuilder sb = new StringBuilder();
-
- WoodInterface head = this.frontbegin;
- int start = 0;
- int end = 1;
- int index = 0;
- boolean isBack = false;
- int length = chars.length;
- // 此处是正向最大匹配
- for (int i = 0; i < length; i++) {
- index++;
- head = head.get(Character.toLowerCase(chars[i]));
- if (head == null) {
- if (isBack) {
- sb.append(tagBegin).append(chars, start, end).append(tagEnd);
- start = start + end;
- i = start - 1;
- isBack = false;
- } else {
- sb.append(chars, start, end);
- i = start;
- start++;
- }
- head = this.frontbegin;
- index = 0;
- end = 1;
- continue;
- }
- switch (head.getStatus()) {
- case 1:
- break;
- case 2:
- end = index;
- isBack = true;
- break;
- case 3:
- sb.append(tagBegin).append(chars, start, index).append(tagEnd);
- start = start + index;
- index = 0;
- end = 1;
- isBack = false;
- head = this.frontbegin;
- break;
- }
- }
-
- return sb.toString();
- }
-
- /**
- * 外媒格式处理
- *
- * @param content
- * @return
- */
- public String getForeignTagContent(String content) {
- if (content == null || content.trim().length() == 0
- || keyWords.size() == 0) {
- return content;
- }
- this.frontbegin = new MakeLibrary().getStringTree(this.keyWords);
- if (frontbegin == null) {
- return content;
- }
- char[] chars = content.toCharArray();
- // 正文
- StringBuilder sb = new StringBuilder();
-
- WoodInterface head = this.frontbegin;
- int start = 0;
- int end = 1;
- int index = 0;
- boolean isBack = false;
- int length = chars.length;
- // 此处是正向最大匹配
- for (int i = 0; i < length; i++) {
- index++;
- head = head.get(Character.toLowerCase(chars[i]));
- if (head == null) {
- if (isBack) {
- sb.append(tagBegin).append(chars, start, end).append(tagEnd);
- start = start + end;
- i = start - 1;
- isBack = false;
- } else {
- sb.append(chars, start, end);
- i = start;
- start++;
- }
- head = this.frontbegin;
- index = 0;
- end = 1;
- continue;
- }
- switch (head.getStatus()) {
- case 1:
- break;
- case 2:
- end = index;
- isBack = true;
- break;
- case 3:
- sb.append(tagBegin).append(chars, start, index).append(tagEnd);
- start = start + index;
- index = 0;
- end = 1;
- isBack = false;
- head = this.frontbegin;
- break;
- }
- }
-
- return sb.toString();
- }
-
- /**
- * Get content without break lines
- *
- * @param content
- * @return
- */
- public String getTagContent(String content) {
- return getTagContentWithBr(content, false);
- }
-
-
-// public static void main(String[] args) {
-// String[] keyWords = {"智能POS"};
-// for (int i = 0; i < 1; i++) {
-// String str = new TagWordUtils("", "").addKeyWords(keyWords)
-// .getTagContent("智能pos真的好");
-// System.out.println(str);
+//package com.bfd.mf.common.util.utility;
+//
+//import org.apache.commons.collections.CollectionUtils;
+//
+//import java.lang.reflect.Array;
+//import java.util.Arrays;
+//import java.util.HashSet;
+//import java.util.Iterator;
+//import java.util.Set;
+//
+//public class TagWordUtils {
+// private String tagBegin;
+// private String tagEnd;
+// Branch frontbegin = null;
+// Set keyWords = new HashSet<>();
+//
+// public Set getKeyWords() {
+// return keyWords;
+// }
+//
+// public String[] getKeyWordsArray() {
+// if (CollectionUtils.isNotEmpty(keyWords)) {
+// String[] keyWordsArray = new String[keyWords.size()];
+// return keyWords.toArray(keyWordsArray);
+// }
+// return null;
+// }
+//
+// public TagWordUtils(String begin, String end) {
+// this.tagBegin = begin;
+// this.tagEnd = end;
+// }
+//
+// public TagWordUtils addKeyWords(String[] keyWord) {
+// if (keyWord.length > 0) {
+// for (int i = 0; i < keyWord.length; i++) {
+// this.keyWords.add(keyWord[i].trim());
+// }
+// }
+// return this;
+// }
+//
+// // 是否发现词
+// boolean findWord = false;
+//
+//
+// /**
+// * Eric Added 2017-05-15 19:28:43
+// * If keep break lines \n in content
+// *
+// * @param content
+// * @param keepBr
+// * @return
+// */
+// public String getTagContentWithBr(String content, boolean keepBr) {
+// if (content == null || content.trim().length() == 0
+// || keyWords.size() == 0) {
+// return content;
+// }
+// // 对原始高亮的信息,进行去掉标签的操作;并且进行去掉 \n \r 以及特殊字符
+// content = content.replaceAll(" ", "").replaceAll(" ", "").replaceAll("\t", "").replaceAll(" ", "")
+// .replaceAll(" ", "");
+// if (!keepBr) {
+// content = content.replaceAll("\r", "").replaceAll("\n", "");
+// }
+// this.frontbegin = new MakeLibrary().getStringTree(this.keyWords);
+// if (frontbegin == null) {
+// return content;
+// }
+// char[] chars = content.toCharArray();
+// // 正文
+// StringBuilder sb = new StringBuilder();
+//
+// WoodInterface head = this.frontbegin;
+// int start = 0;
+// int end = 1;
+// int index = 0;
+// boolean isBack = false;
+// int length = chars.length;
+// // 此处是正向最大匹配
+// for (int i = 0; i < length; i++) {
+// index++;
+// head = head.get(Character.toLowerCase(chars[i]));
+// if (head == null) {
+// if (isBack) {
+// sb.append(tagBegin).append(chars, start, end).append(tagEnd);
+// start = start + end;
+// i = start - 1;
+// isBack = false;
+// } else {
+// sb.append(chars, start, end);
+// i = start;
+// start++;
+// }
+// head = this.frontbegin;
+// index = 0;
+// end = 1;
+// continue;
+// }
+// switch (head.getStatus()) {
+// case 1:
+// break;
+// case 2:
+// end = index;
+// isBack = true;
+// break;
+// case 3:
+// sb.append(tagBegin).append(chars, start, index).append(tagEnd);
+// start = start + index;
+// index = 0;
+// end = 1;
+// isBack = false;
+// head = this.frontbegin;
+// break;
+// }
+// }
+//
+// return sb.toString();
+// }
+//
+// /**
+// * 外媒格式处理
+// *
+// * @param content
+// * @return
+// */
+// public String getForeignTagContent(String content) {
+// if (content == null || content.trim().length() == 0
+// || keyWords.size() == 0) {
+// return content;
+// }
+// this.frontbegin = new MakeLibrary().getStringTree(this.keyWords);
+// if (frontbegin == null) {
+// return content;
+// }
+// char[] chars = content.toCharArray();
+// // 正文
+// StringBuilder sb = new StringBuilder();
+//
+// WoodInterface head = this.frontbegin;
+// int start = 0;
+// int end = 1;
+// int index = 0;
+// boolean isBack = false;
+// int length = chars.length;
+// // 此处是正向最大匹配
+// for (int i = 0; i < length; i++) {
+// index++;
+// head = head.get(Character.toLowerCase(chars[i]));
+// if (head == null) {
+// if (isBack) {
+// sb.append(tagBegin).append(chars, start, end).append(tagEnd);
+// start = start + end;
+// i = start - 1;
+// isBack = false;
+// } else {
+// sb.append(chars, start, end);
+// i = start;
+// start++;
+// }
+// head = this.frontbegin;
+// index = 0;
+// end = 1;
+// continue;
+// }
+// switch (head.getStatus()) {
+// case 1:
+// break;
+// case 2:
+// end = index;
+// isBack = true;
+// break;
+// case 3:
+// sb.append(tagBegin).append(chars, start, index).append(tagEnd);
+// start = start + index;
+// index = 0;
+// end = 1;
+// isBack = false;
+// head = this.frontbegin;
+// break;
+// }
+// }
+//
+// return sb.toString();
+// }
+//
+// /**
+// * Get content without break lines
+// *
+// * @param content
+// * @return
+// */
+// public String getTagContent(String content) {
+// return getTagContentWithBr(content, false);
+// }
+//
+//
+//// public static void main(String[] args) {
+//// String[] keyWords = {"智能POS"};
+//// for (int i = 0; i < 1; i++) {
+//// String str = new TagWordUtils("", "").addKeyWords(keyWords)
+//// .getTagContent("智能pos真的好");
+//// System.out.println(str);
+//// }
+//// }
+//
+//}
+//
+//class MakeLibrary {
+//
+// public MakeLibrary() {
+// }
+//
+// // 是否有下一个
+// private static boolean hasNext = true;
+// // 是否是一个词
+// private static boolean isWords = true;
+//
+// Iterator it = null;
+//
+// public Branch getStringTree(Set keyWords) {
+// it = keyWords.iterator();
+// Branch head = new Branch('h', 0, 0);
+// Branch branch = head;
+//
+// while (it.hasNext()) {
+// /**
+// * 对于英文字母全部都全部转换成小写
+// */
+// char[] chars = it.next().toLowerCase().toCharArray();
+// for (int i = 0; i < chars.length; i++) {
+// if (chars.length == (i + 1)) {
+// isWords = true;
+// hasNext = false;
+// } else {
+// isWords = false;
+// hasNext = true;
+// }
+// int status = 1;
+// if (isWords && hasNext) {
+// status = 2;
+// }
+// if (!isWords && hasNext) {
+// status = 1;
+// }
+// if (isWords && !hasNext) {
+// status = 3;
+// }
+// branch.add(new Branch(chars[i], status, 0));
+// branch = (Branch) branch.get(chars[i]);
+// }
+// branch = head;
+// }
+// return head;
+// }
+//}
+//
+//interface WoodInterface {
+// public WoodInterface add(WoodInterface branch);
+//
+// public WoodInterface get(char c);
+//
+// public boolean contains(char c);
+//
+// public int compareTo(char c);
+//
+// public boolean equals(char c);
+//
+// public byte getStatus();
+//
+// public char getC();
+//
+// public void setStatus(int status);
+//
+// public byte getNature();
+//
+// public void setNature(byte nature);
+//}
+//
+//
+//class Branch implements WoodInterface {
+// /**
+// * status 此字的状态1,继续 2,是个词语但是还可以继续 ,3确定
+// * nature 词语性质
+// * 0.未知 . 1是姓 . 2 是职位名称 3 是数量级的词 . 4 是数字词语 5 是标点
+// */
+// WoodInterface[] branches = new WoodInterface[0];
+// private char c;
+// // 状态
+// private byte status = 1;
+// // 索引
+// private short index = -1;
+// // 词性
+// private byte nature = 0;
+// // 单独查找出来的对象
+// WoodInterface branch = null;
+//
+// public WoodInterface add(WoodInterface branch) {
+// if ((this.branch = this.get(branch.getC())) != null) {
+// switch (branch.getStatus()) {
+// case 1:
+// if (this.branch.getStatus() == 2) {
+// this.branch.setStatus(2);
+// }
+// if (this.branch.getStatus() == 3) {
+// this.branch.setStatus(2);
+// }
+// break;
+// case 2:
+// this.branch.setStatus(2);
+// case 3:
+// if (this.branch.getStatus() == 2) {
+// this.branch.setStatus(2);
+// }
+// if (this.branch.getStatus() == 1) {
+// this.branch.setStatus(2);
+// }
+// }
+// this.branch.setNature(branch.getNature());
+// return this.branch;
+// }
+// index++;
+// if ((index + 1) > branches.length) {
+// branches = Arrays.copyOf(branches, index + 1);
+// }
+// branches[index] = branch;
+// AnsjArrays.sort(branches);
+// return branch;
+// }
+//
+// public Branch(char c, int status, int nature) {
+// this.c = c;
+// this.status = (byte) status;
+// this.nature = (byte) nature;
+// }
+//
+// int i = 0;
+//
+// public WoodInterface get(char c) {
+// int i = AnsjArrays.binarySearch(branches, c);
+// if (i > -1) {
+// return branches[i];
+// }
+// return null;
+// }
+//
+// public boolean contains(char c) {
+// if (AnsjArrays.binarySearch(branches, c) > -1) {
+// return true;
+// } else {
+// return false;
+// }
+// }
+//
+// public int compareTo(char c) {
+// if (this.c > c) {
+// return 1;
+// } else if (this.c < c) {
+// return -1;
+// } else {
+// return 0;
+// }
+// }
+//
+// public boolean equals(char c) {
+// if (this.c == c) {
+// return true;
+// } else {
+// return false;
+// }
+// }
+//
+// @Override
+// public int hashCode() {
+// // TODO Auto-generated method stub
+// return c;
+// }
+//
+// public byte getStatus() {
+// return status;
+// }
+//
+// public void setStatus(int status) {
+// this.status = (byte) status;
+// }
+//
+// public char getC() {
+// return this.c;
+// }
+//
+// public byte getNature() {
+// return nature;
+// }
+//
+// public void setNature(byte nature) {
+// this.nature = nature;
+// }
+//
+//}
+//
+//class AnsjArrays {
+// private static final int INSERTIONSORT_THRESHOLD = 7;
+//
+// /**
+// * 二分法查找.摘抄了jdk的东西..只不过把他的自动装箱功能给去掉了
+// */
+// public static int binarySearch(WoodInterface[] branches, char c) {
+// int high = branches.length - 1;
+// if (branches.length < 1) {
+// return high;
+// }
+// int low = 0;
+// while (low <= high) {
+// int mid = (low + high) >>> 1;
+// int cmp = branches[mid].compareTo(c);
+//
+// if (cmp < 0) {
+// low = mid + 1;
+// } else if (cmp > 0) {
+// high = mid - 1;
+// } else {
+// return mid; // key found
+// }
+// }
+// return -1; // key not found.
+// }
+//
+// public static void sort(WoodInterface[] a) {
+// WoodInterface[] aux = (WoodInterface[]) a.clone();
+// mergeSort(aux, a, 0, a.length, 0);
+// }
+//
+// public static void sort(WoodInterface[] a, int fromIndex, int toIndex) {
+// rangeCheck(a.length, fromIndex, toIndex);
+// WoodInterface[] aux = copyOfRange(a, fromIndex, toIndex);
+// mergeSort(aux, a, fromIndex, toIndex, -fromIndex);
+// }
+//
+// private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) {
+// if (fromIndex > toIndex) {
+// throw new IllegalArgumentException("fromIndex(" + fromIndex
+// + ") > toIndex(" + toIndex + ")");
+// }
+// if (fromIndex < 0) {
+// throw new ArrayIndexOutOfBoundsException(fromIndex);
+// }
+// if (toIndex > arrayLen) {
+// throw new ArrayIndexOutOfBoundsException(toIndex);
+// }
+// }
+//
+// private static void mergeSort(WoodInterface[] src, WoodInterface[] dest, int low,
+// int high, int off) {
+// int length = high - low;
+//
+// // Insertion sort on smallest arrays
+// if (length < INSERTIONSORT_THRESHOLD) {
+// for (int i = low; i < high; i++)
+// for (int j = i; j > low
+// && (dest[j - 1]).compareTo(dest[j].getC()) > 0; j--)
+// swap(dest, j, j - 1);
+// return;
+// }
+//
+// // Recursively sort halves of dest into src
+// int destLow = low;
+// int destHigh = high;
+// low += off;
+// high += off;
+// int mid = (low + high) >>> 1;
+// mergeSort(dest, src, low, mid, -off);
+// mergeSort(dest, src, mid, high, -off);
+//
+// // If list is already sorted, just copy from src to dest. This is an
+// // optimization that results in faster sorts for nearly ordered lists.
+// if (src[mid - 1].compareTo(src[mid].getC()) <= 0) {
+// System.arraycopy(src, low, dest, destLow, length);
+// return;
+// }
+//
+// // Merge sorted halves (now in src) into dest
+// for (int i = destLow, p = low, q = mid; i < destHigh; i++) {
+// if (q >= high || p < mid
+// && src[p].compareTo(src[q].getC()) <= 0) {
+// dest[i] = src[p++];
+// } else {
+// dest[i] = src[q++];
+// }
+// }
+// }
+//
+// /**
+// * Swaps x[a] with x[b].
+// */
+// private static void swap(WoodInterface[] x, int a, int b) {
+// WoodInterface t = x[a];
+// x[a] = x[b];
+// x[b] = t;
+// }
+//
+// public static T[] copyOfRange(T[] original, int from, int to) {
+// return copyOfRange(original, from, to, (Class) original.getClass());
+// }
+//
+// public static T[] copyOfRange(U[] original, int from, int to,
+// Class extends T[]> newType) {
+// int newLength = to - from;
+// if (newLength < 0) {
+// throw new IllegalArgumentException(from + " > " + to);
// }
+// T[] copy = ((Object) newType == (Object) Object[].class) ? (T[]) new Object[newLength]
+// : (T[]) Array
+// .newInstance(newType.getComponentType(), newLength);
+// System.arraycopy(original, from, copy, 0, Math.min(original.length
+// - from, newLength));
+// return copy;
// }
-
-}
-
-class MakeLibrary {
-
- public MakeLibrary() {
- }
-
- // 是否有下一个
- private static boolean hasNext = true;
- // 是否是一个词
- private static boolean isWords = true;
-
- Iterator it = null;
-
- public Branch getStringTree(Set keyWords) {
- it = keyWords.iterator();
- Branch head = new Branch('h', 0, 0);
- Branch branch = head;
-
- while (it.hasNext()) {
- /**
- * 对于英文字母全部都全部转换成小写
- */
- char[] chars = it.next().toLowerCase().toCharArray();
- for (int i = 0; i < chars.length; i++) {
- if (chars.length == (i + 1)) {
- isWords = true;
- hasNext = false;
- } else {
- isWords = false;
- hasNext = true;
- }
- int status = 1;
- if (isWords && hasNext) {
- status = 2;
- }
- if (!isWords && hasNext) {
- status = 1;
- }
- if (isWords && !hasNext) {
- status = 3;
- }
- branch.add(new Branch(chars[i], status, 0));
- branch = (Branch) branch.get(chars[i]);
- }
- branch = head;
- }
- return head;
- }
-}
-
-interface WoodInterface {
- public WoodInterface add(WoodInterface branch);
-
- public WoodInterface get(char c);
-
- public boolean contains(char c);
-
- public int compareTo(char c);
-
- public boolean equals(char c);
-
- public byte getStatus();
-
- public char getC();
-
- public void setStatus(int status);
-
- public byte getNature();
-
- public void setNature(byte nature);
-}
-
-
-class Branch implements WoodInterface {
- /**
- * status 此字的状态1,继续 2,是个词语但是还可以继续 ,3确定
- * nature 词语性质
- * 0.未知 . 1是姓 . 2 是职位名称 3 是数量级的词 . 4 是数字词语 5 是标点
- */
- WoodInterface[] branches = new WoodInterface[0];
- private char c;
- // 状态
- private byte status = 1;
- // 索引
- private short index = -1;
- // 词性
- private byte nature = 0;
- // 单独查找出来的对象
- WoodInterface branch = null;
-
- public WoodInterface add(WoodInterface branch) {
- if ((this.branch = this.get(branch.getC())) != null) {
- switch (branch.getStatus()) {
- case 1:
- if (this.branch.getStatus() == 2) {
- this.branch.setStatus(2);
- }
- if (this.branch.getStatus() == 3) {
- this.branch.setStatus(2);
- }
- break;
- case 2:
- this.branch.setStatus(2);
- case 3:
- if (this.branch.getStatus() == 2) {
- this.branch.setStatus(2);
- }
- if (this.branch.getStatus() == 1) {
- this.branch.setStatus(2);
- }
- }
- this.branch.setNature(branch.getNature());
- return this.branch;
- }
- index++;
- if ((index + 1) > branches.length) {
- branches = Arrays.copyOf(branches, index + 1);
- }
- branches[index] = branch;
- AnsjArrays.sort(branches);
- return branch;
- }
-
- public Branch(char c, int status, int nature) {
- this.c = c;
- this.status = (byte) status;
- this.nature = (byte) nature;
- }
-
- int i = 0;
-
- public WoodInterface get(char c) {
- int i = AnsjArrays.binarySearch(branches, c);
- if (i > -1) {
- return branches[i];
- }
- return null;
- }
-
- public boolean contains(char c) {
- if (AnsjArrays.binarySearch(branches, c) > -1) {
- return true;
- } else {
- return false;
- }
- }
-
- public int compareTo(char c) {
- if (this.c > c) {
- return 1;
- } else if (this.c < c) {
- return -1;
- } else {
- return 0;
- }
- }
-
- public boolean equals(char c) {
- if (this.c == c) {
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public int hashCode() {
- // TODO Auto-generated method stub
- return c;
- }
-
- public byte getStatus() {
- return status;
- }
-
- public void setStatus(int status) {
- this.status = (byte) status;
- }
-
- public char getC() {
- return this.c;
- }
-
- public byte getNature() {
- return nature;
- }
-
- public void setNature(byte nature) {
- this.nature = nature;
- }
-
-}
-
-class AnsjArrays {
- private static final int INSERTIONSORT_THRESHOLD = 7;
-
- /**
- * 二分法查找.摘抄了jdk的东西..只不过把他的自动装箱功能给去掉了
- */
- public static int binarySearch(WoodInterface[] branches, char c) {
- int high = branches.length - 1;
- if (branches.length < 1) {
- return high;
- }
- int low = 0;
- while (low <= high) {
- int mid = (low + high) >>> 1;
- int cmp = branches[mid].compareTo(c);
-
- if (cmp < 0) {
- low = mid + 1;
- } else if (cmp > 0) {
- high = mid - 1;
- } else {
- return mid; // key found
- }
- }
- return -1; // key not found.
- }
-
- public static void sort(WoodInterface[] a) {
- WoodInterface[] aux = (WoodInterface[]) a.clone();
- mergeSort(aux, a, 0, a.length, 0);
- }
-
- public static void sort(WoodInterface[] a, int fromIndex, int toIndex) {
- rangeCheck(a.length, fromIndex, toIndex);
- WoodInterface[] aux = copyOfRange(a, fromIndex, toIndex);
- mergeSort(aux, a, fromIndex, toIndex, -fromIndex);
- }
-
- private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) {
- if (fromIndex > toIndex) {
- throw new IllegalArgumentException("fromIndex(" + fromIndex
- + ") > toIndex(" + toIndex + ")");
- }
- if (fromIndex < 0) {
- throw new ArrayIndexOutOfBoundsException(fromIndex);
- }
- if (toIndex > arrayLen) {
- throw new ArrayIndexOutOfBoundsException(toIndex);
- }
- }
-
- private static void mergeSort(WoodInterface[] src, WoodInterface[] dest, int low,
- int high, int off) {
- int length = high - low;
-
- // Insertion sort on smallest arrays
- if (length < INSERTIONSORT_THRESHOLD) {
- for (int i = low; i < high; i++)
- for (int j = i; j > low
- && (dest[j - 1]).compareTo(dest[j].getC()) > 0; j--)
- swap(dest, j, j - 1);
- return;
- }
-
- // Recursively sort halves of dest into src
- int destLow = low;
- int destHigh = high;
- low += off;
- high += off;
- int mid = (low + high) >>> 1;
- mergeSort(dest, src, low, mid, -off);
- mergeSort(dest, src, mid, high, -off);
-
- // If list is already sorted, just copy from src to dest. This is an
- // optimization that results in faster sorts for nearly ordered lists.
- if (src[mid - 1].compareTo(src[mid].getC()) <= 0) {
- System.arraycopy(src, low, dest, destLow, length);
- return;
- }
-
- // Merge sorted halves (now in src) into dest
- for (int i = destLow, p = low, q = mid; i < destHigh; i++) {
- if (q >= high || p < mid
- && src[p].compareTo(src[q].getC()) <= 0) {
- dest[i] = src[p++];
- } else {
- dest[i] = src[q++];
- }
- }
- }
-
- /**
- * Swaps x[a] with x[b].
- */
- private static void swap(WoodInterface[] x, int a, int b) {
- WoodInterface t = x[a];
- x[a] = x[b];
- x[b] = t;
- }
-
- public static T[] copyOfRange(T[] original, int from, int to) {
- return copyOfRange(original, from, to, (Class) original.getClass());
- }
-
- public static T[] copyOfRange(U[] original, int from, int to,
- Class extends T[]> newType) {
- int newLength = to - from;
- if (newLength < 0) {
- throw new IllegalArgumentException(from + " > " + to);
- }
- T[] copy = ((Object) newType == (Object) Object[].class) ? (T[]) new Object[newLength]
- : (T[]) Array
- .newInstance(newType.getComponentType(), newLength);
- System.arraycopy(original, from, copy, 0, Math.min(original.length
- - from, newLength));
- return copy;
- }
-}
+//}
diff --git a/cl_search_api/src/main/resources/application.yml b/cl_search_api/src/main/resources/application.yml
index 88c0d47..58a1028 100644
--- a/cl_search_api/src/main/resources/application.yml
+++ b/cl_search_api/src/main/resources/application.yml
@@ -14,13 +14,13 @@ server:
spring:
datasource:
- driver-class-name: com.mysql.jdbc.Driver
-# username: root
-# password: Bfd123!@#
-# url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
+ driver-class-name: com.mysql.cj.jdbc.Driver
username: crawl
- password: D5HLOvk553DUNV62qJI=
- url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
+ password: crawl123
+ url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC
+# username: root
+# password: bfd123
+# url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC
hikari:
maximum-pool-size: 10
minimum-idle: 1
@@ -47,8 +47,8 @@ bfd.api.mf:
indexNamePre : cl_major_
es-mini:
- name: SQ_Mini
- address: 172.18.1.147:9313
+ name: SQ_Mini_2
+ address: 172.18.1.81:9301
upper: 2018-09-01
standby: cl_major_*
es-normal: