From ea589f7e837a33d2408505b46495fc0218552ff3 Mon Sep 17 00:00:00 2001 From: "jing.du" Date: Wed, 22 Feb 2023 14:16:11 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BB=9928=E6=94=B9=E7=9A=84=E4=B8=80=E4=B8=AA?= =?UTF-8?q?=E7=89=88=E6=9C=AC=EF=BC=8C=E6=9F=A5=E8=AF=A2=E4=B8=BB=E8=B4=B4?= =?UTF-8?q?=E7=9A=84=E8=AF=84=E8=AE=BA=E6=97=B6=EF=BC=8C=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E4=BA=86=E4=B8=A4=E4=B8=AA=E8=BE=93=E5=87=BA=E5=AD=97=E6=AE=B5?= =?UTF-8?q?=EF=BC=9ApageType=20=E5=92=8C=20userType,=E7=94=A8=E6=9D=A5?= =?UTF-8?q?=E5=8C=BA=E5=88=86=20=E8=AF=84=E8=AE=BA=E3=80=81=E8=BD=AC?= =?UTF-8?q?=E5=8F=91=E3=80=81=E7=82=B9=E8=B5=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/compiler.xml | 1 + .idea/encodings.xml | 2 + .../Maven__mysql_mysql_connector_java_5_1_29.xml | 13 - .idea/misc.xml | 5 +- .idea/modules.xml | 1 + .../com/bfd/mf/job/service/query/QueryService.java | 10 - cl_search_api/cl_search_api.iml | 3 +- cl_search_api/pom.xml | 4 +- .../bfd/mf/common/util/constants/ESConstant.java | 9 +- .../bfd/mf/common/util/utility/TagWordUtils.java | 1032 ++++++++++---------- cl_search_api/src/main/resources/application.yml | 16 +- 11 files changed, 544 insertions(+), 552 deletions(-) delete mode 100644 .idea/libraries/Maven__mysql_mysql_connector_java_5_1_29.xml diff --git a/.idea/compiler.xml b/.idea/compiler.xml index a50c87d..0e19055 100644 --- a/.idea/compiler.xml +++ b/.idea/compiler.xml @@ -16,6 +16,7 @@ + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml index d6c99ac..50cf68e 100644 --- a/.idea/encodings.xml +++ b/.idea/encodings.xml @@ -8,5 +8,7 @@ + + \ No newline at end of file diff --git a/.idea/libraries/Maven__mysql_mysql_connector_java_5_1_29.xml b/.idea/libraries/Maven__mysql_mysql_connector_java_5_1_29.xml deleted file mode 100644 index 5532a0b..0000000 --- a/.idea/libraries/Maven__mysql_mysql_connector_java_5_1_29.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 273b71e..20293e3 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -7,7 +7,10 @@ - + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml index 921be69..6395727 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -5,6 +5,7 @@ + \ No newline at end of file diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java index dc744bf..c3f0919 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java @@ -7,16 +7,13 @@ import com.alibaba.fastjson.serializer.SerializerFeature; import com.bfd.crawler.utils.JsonUtils; import com.bfd.mf.job.config.AppConfig; import com.bfd.mf.job.config.ESConstants; -import com.bfd.mf.job.domain.entity.Subject; import com.bfd.mf.job.domain.entity.Task; import com.bfd.mf.job.domain.repository.SubjectRepository; import com.bfd.mf.job.domain.repository.TaskRepository; import com.bfd.mf.job.download.DownLoadFile; -import com.bfd.mf.job.service.WriterTXTService; import com.bfd.mf.job.util.*; import com.google.common.collect.Maps; import com.google.common.util.concurrent.RateLimiter; -import kafka.utils.Json; import org.apache.commons.lang3.exception.ExceptionUtils; import org.assertj.core.util.Lists; import org.elasticsearch.index.query.*; @@ -26,15 +23,8 @@ import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import javax.annotation.PostConstruct; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; -import java.math.BigDecimal; import java.math.BigInteger; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLConnection; import java.sql.Timestamp; import java.util.*; import java.util.concurrent.BlockingQueue; diff --git a/cl_search_api/cl_search_api.iml b/cl_search_api/cl_search_api.iml index dcb9b34..952369f 100644 --- a/cl_search_api/cl_search_api.iml +++ b/cl_search_api/cl_search_api.iml @@ -162,7 +162,8 @@ - + + diff --git a/cl_search_api/pom.xml b/cl_search_api/pom.xml index fad6989..f0ddb5c 100644 --- a/cl_search_api/pom.xml +++ b/cl_search_api/pom.xml @@ -13,7 +13,7 @@ cl_search_api Search V3.2 API cl_search_api - 3.2-SNAPSHOT + 3.2.4-SNAPSHOT @@ -138,7 +138,7 @@ mysql mysql-connector-java - 5.1.29 + 8.0.30 diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java index 5647efe..6e6c725 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java @@ -807,7 +807,14 @@ public class ESConstant { ESConstant.VIDEOTIME, // 视频的时长 ESConstant.OCRTEXT, // 图像识别结果 - ESConstant.ASRTEXT // 语音识别结果 + ESConstant.ASRTEXT, // 语音识别结果 + + // 判断评论数据的类型,评论/转发/点赞 + // pageType = socialComment 评论 + // pageType = socialFollow userType = 0 + // pageType = socialFollow userType = 1 + ESConstant.PAGETYPE, + ESConstant.USER_TYPE ); // 总体分析要用的字段 diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/TagWordUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/TagWordUtils.java index 4b266cc..25d5d5d 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/TagWordUtils.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/TagWordUtils.java @@ -1,518 +1,518 @@ -package com.bfd.mf.common.util.utility; - -import org.apache.commons.collections.CollectionUtils; - -import java.lang.reflect.Array; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; - -public class TagWordUtils { - private String tagBegin; - private String tagEnd; - Branch frontbegin = null; - Set keyWords = new HashSet<>(); - - public Set getKeyWords() { - return keyWords; - } - - public String[] getKeyWordsArray() { - if (CollectionUtils.isNotEmpty(keyWords)) { - String[] keyWordsArray = new String[keyWords.size()]; - return keyWords.toArray(keyWordsArray); - } - return null; - } - - public TagWordUtils(String begin, String end) { - this.tagBegin = begin; - this.tagEnd = end; - } - - public TagWordUtils addKeyWords(String[] keyWord) { - if (keyWord.length > 0) { - for (int i = 0; i < keyWord.length; i++) { - this.keyWords.add(keyWord[i].trim()); - } - } - return this; - } - - // 是否发现词 - boolean findWord = false; - - - /** - * Eric Added 2017-05-15 19:28:43 - * If keep break lines \n in content - * - * @param content - * @param keepBr - * @return - */ - public String getTagContentWithBr(String content, boolean keepBr) { - if (content == null || content.trim().length() == 0 - || keyWords.size() == 0) { - return content; - } - // 对原始高亮的信息,进行去掉标签的操作;并且进行去掉 \n \r 以及特殊字符 - content = content.replaceAll(" ", "").replaceAll(" ", "").replaceAll("\t", "").replaceAll(" ", "") - .replaceAll("  ", ""); - if (!keepBr) { - content = content.replaceAll("\r", "").replaceAll("\n", ""); - } - this.frontbegin = new MakeLibrary().getStringTree(this.keyWords); - if (frontbegin == null) { - return content; - } - char[] chars = content.toCharArray(); - // 正文 - StringBuilder sb = new StringBuilder(); - - WoodInterface head = this.frontbegin; - int start = 0; - int end = 1; - int index = 0; - boolean isBack = false; - int length = chars.length; - // 此处是正向最大匹配 - for (int i = 0; i < length; i++) { - index++; - head = head.get(Character.toLowerCase(chars[i])); - if (head == null) { - if (isBack) { - sb.append(tagBegin).append(chars, start, end).append(tagEnd); - start = start + end; - i = start - 1; - isBack = false; - } else { - sb.append(chars, start, end); - i = start; - start++; - } - head = this.frontbegin; - index = 0; - end = 1; - continue; - } - switch (head.getStatus()) { - case 1: - break; - case 2: - end = index; - isBack = true; - break; - case 3: - sb.append(tagBegin).append(chars, start, index).append(tagEnd); - start = start + index; - index = 0; - end = 1; - isBack = false; - head = this.frontbegin; - break; - } - } - - return sb.toString(); - } - - /** - * 外媒格式处理 - * - * @param content - * @return - */ - public String getForeignTagContent(String content) { - if (content == null || content.trim().length() == 0 - || keyWords.size() == 0) { - return content; - } - this.frontbegin = new MakeLibrary().getStringTree(this.keyWords); - if (frontbegin == null) { - return content; - } - char[] chars = content.toCharArray(); - // 正文 - StringBuilder sb = new StringBuilder(); - - WoodInterface head = this.frontbegin; - int start = 0; - int end = 1; - int index = 0; - boolean isBack = false; - int length = chars.length; - // 此处是正向最大匹配 - for (int i = 0; i < length; i++) { - index++; - head = head.get(Character.toLowerCase(chars[i])); - if (head == null) { - if (isBack) { - sb.append(tagBegin).append(chars, start, end).append(tagEnd); - start = start + end; - i = start - 1; - isBack = false; - } else { - sb.append(chars, start, end); - i = start; - start++; - } - head = this.frontbegin; - index = 0; - end = 1; - continue; - } - switch (head.getStatus()) { - case 1: - break; - case 2: - end = index; - isBack = true; - break; - case 3: - sb.append(tagBegin).append(chars, start, index).append(tagEnd); - start = start + index; - index = 0; - end = 1; - isBack = false; - head = this.frontbegin; - break; - } - } - - return sb.toString(); - } - - /** - * Get content without break lines - * - * @param content - * @return - */ - public String getTagContent(String content) { - return getTagContentWithBr(content, false); - } - - -// public static void main(String[] args) { -// String[] keyWords = {"智能POS"}; -// for (int i = 0; i < 1; i++) { -// String str = new TagWordUtils("", "").addKeyWords(keyWords) -// .getTagContent("智能pos真的好"); -// System.out.println(str); +//package com.bfd.mf.common.util.utility; +// +//import org.apache.commons.collections.CollectionUtils; +// +//import java.lang.reflect.Array; +//import java.util.Arrays; +//import java.util.HashSet; +//import java.util.Iterator; +//import java.util.Set; +// +//public class TagWordUtils { +// private String tagBegin; +// private String tagEnd; +// Branch frontbegin = null; +// Set keyWords = new HashSet<>(); +// +// public Set getKeyWords() { +// return keyWords; +// } +// +// public String[] getKeyWordsArray() { +// if (CollectionUtils.isNotEmpty(keyWords)) { +// String[] keyWordsArray = new String[keyWords.size()]; +// return keyWords.toArray(keyWordsArray); +// } +// return null; +// } +// +// public TagWordUtils(String begin, String end) { +// this.tagBegin = begin; +// this.tagEnd = end; +// } +// +// public TagWordUtils addKeyWords(String[] keyWord) { +// if (keyWord.length > 0) { +// for (int i = 0; i < keyWord.length; i++) { +// this.keyWords.add(keyWord[i].trim()); +// } +// } +// return this; +// } +// +// // 是否发现词 +// boolean findWord = false; +// +// +// /** +// * Eric Added 2017-05-15 19:28:43 +// * If keep break lines \n in content +// * +// * @param content +// * @param keepBr +// * @return +// */ +// public String getTagContentWithBr(String content, boolean keepBr) { +// if (content == null || content.trim().length() == 0 +// || keyWords.size() == 0) { +// return content; +// } +// // 对原始高亮的信息,进行去掉标签的操作;并且进行去掉 \n \r 以及特殊字符 +// content = content.replaceAll(" ", "").replaceAll(" ", "").replaceAll("\t", "").replaceAll(" ", "") +// .replaceAll("  ", ""); +// if (!keepBr) { +// content = content.replaceAll("\r", "").replaceAll("\n", ""); +// } +// this.frontbegin = new MakeLibrary().getStringTree(this.keyWords); +// if (frontbegin == null) { +// return content; +// } +// char[] chars = content.toCharArray(); +// // 正文 +// StringBuilder sb = new StringBuilder(); +// +// WoodInterface head = this.frontbegin; +// int start = 0; +// int end = 1; +// int index = 0; +// boolean isBack = false; +// int length = chars.length; +// // 此处是正向最大匹配 +// for (int i = 0; i < length; i++) { +// index++; +// head = head.get(Character.toLowerCase(chars[i])); +// if (head == null) { +// if (isBack) { +// sb.append(tagBegin).append(chars, start, end).append(tagEnd); +// start = start + end; +// i = start - 1; +// isBack = false; +// } else { +// sb.append(chars, start, end); +// i = start; +// start++; +// } +// head = this.frontbegin; +// index = 0; +// end = 1; +// continue; +// } +// switch (head.getStatus()) { +// case 1: +// break; +// case 2: +// end = index; +// isBack = true; +// break; +// case 3: +// sb.append(tagBegin).append(chars, start, index).append(tagEnd); +// start = start + index; +// index = 0; +// end = 1; +// isBack = false; +// head = this.frontbegin; +// break; +// } +// } +// +// return sb.toString(); +// } +// +// /** +// * 外媒格式处理 +// * +// * @param content +// * @return +// */ +// public String getForeignTagContent(String content) { +// if (content == null || content.trim().length() == 0 +// || keyWords.size() == 0) { +// return content; +// } +// this.frontbegin = new MakeLibrary().getStringTree(this.keyWords); +// if (frontbegin == null) { +// return content; +// } +// char[] chars = content.toCharArray(); +// // 正文 +// StringBuilder sb = new StringBuilder(); +// +// WoodInterface head = this.frontbegin; +// int start = 0; +// int end = 1; +// int index = 0; +// boolean isBack = false; +// int length = chars.length; +// // 此处是正向最大匹配 +// for (int i = 0; i < length; i++) { +// index++; +// head = head.get(Character.toLowerCase(chars[i])); +// if (head == null) { +// if (isBack) { +// sb.append(tagBegin).append(chars, start, end).append(tagEnd); +// start = start + end; +// i = start - 1; +// isBack = false; +// } else { +// sb.append(chars, start, end); +// i = start; +// start++; +// } +// head = this.frontbegin; +// index = 0; +// end = 1; +// continue; +// } +// switch (head.getStatus()) { +// case 1: +// break; +// case 2: +// end = index; +// isBack = true; +// break; +// case 3: +// sb.append(tagBegin).append(chars, start, index).append(tagEnd); +// start = start + index; +// index = 0; +// end = 1; +// isBack = false; +// head = this.frontbegin; +// break; +// } +// } +// +// return sb.toString(); +// } +// +// /** +// * Get content without break lines +// * +// * @param content +// * @return +// */ +// public String getTagContent(String content) { +// return getTagContentWithBr(content, false); +// } +// +// +//// public static void main(String[] args) { +//// String[] keyWords = {"智能POS"}; +//// for (int i = 0; i < 1; i++) { +//// String str = new TagWordUtils("", "").addKeyWords(keyWords) +//// .getTagContent("智能pos真的好"); +//// System.out.println(str); +//// } +//// } +// +//} +// +//class MakeLibrary { +// +// public MakeLibrary() { +// } +// +// // 是否有下一个 +// private static boolean hasNext = true; +// // 是否是一个词 +// private static boolean isWords = true; +// +// Iterator it = null; +// +// public Branch getStringTree(Set keyWords) { +// it = keyWords.iterator(); +// Branch head = new Branch('h', 0, 0); +// Branch branch = head; +// +// while (it.hasNext()) { +// /** +// * 对于英文字母全部都全部转换成小写 +// */ +// char[] chars = it.next().toLowerCase().toCharArray(); +// for (int i = 0; i < chars.length; i++) { +// if (chars.length == (i + 1)) { +// isWords = true; +// hasNext = false; +// } else { +// isWords = false; +// hasNext = true; +// } +// int status = 1; +// if (isWords && hasNext) { +// status = 2; +// } +// if (!isWords && hasNext) { +// status = 1; +// } +// if (isWords && !hasNext) { +// status = 3; +// } +// branch.add(new Branch(chars[i], status, 0)); +// branch = (Branch) branch.get(chars[i]); +// } +// branch = head; +// } +// return head; +// } +//} +// +//interface WoodInterface { +// public WoodInterface add(WoodInterface branch); +// +// public WoodInterface get(char c); +// +// public boolean contains(char c); +// +// public int compareTo(char c); +// +// public boolean equals(char c); +// +// public byte getStatus(); +// +// public char getC(); +// +// public void setStatus(int status); +// +// public byte getNature(); +// +// public void setNature(byte nature); +//} +// +// +//class Branch implements WoodInterface { +// /** +// * status 此字的状态1,继续 2,是个词语但是还可以继续 ,3确定 +// * nature 词语性质 +// * 0.未知 . 1是姓 . 2 是职位名称 3 是数量级的词 . 4 是数字词语 5 是标点 +// */ +// WoodInterface[] branches = new WoodInterface[0]; +// private char c; +// // 状态 +// private byte status = 1; +// // 索引 +// private short index = -1; +// // 词性 +// private byte nature = 0; +// // 单独查找出来的对象 +// WoodInterface branch = null; +// +// public WoodInterface add(WoodInterface branch) { +// if ((this.branch = this.get(branch.getC())) != null) { +// switch (branch.getStatus()) { +// case 1: +// if (this.branch.getStatus() == 2) { +// this.branch.setStatus(2); +// } +// if (this.branch.getStatus() == 3) { +// this.branch.setStatus(2); +// } +// break; +// case 2: +// this.branch.setStatus(2); +// case 3: +// if (this.branch.getStatus() == 2) { +// this.branch.setStatus(2); +// } +// if (this.branch.getStatus() == 1) { +// this.branch.setStatus(2); +// } +// } +// this.branch.setNature(branch.getNature()); +// return this.branch; +// } +// index++; +// if ((index + 1) > branches.length) { +// branches = Arrays.copyOf(branches, index + 1); +// } +// branches[index] = branch; +// AnsjArrays.sort(branches); +// return branch; +// } +// +// public Branch(char c, int status, int nature) { +// this.c = c; +// this.status = (byte) status; +// this.nature = (byte) nature; +// } +// +// int i = 0; +// +// public WoodInterface get(char c) { +// int i = AnsjArrays.binarySearch(branches, c); +// if (i > -1) { +// return branches[i]; +// } +// return null; +// } +// +// public boolean contains(char c) { +// if (AnsjArrays.binarySearch(branches, c) > -1) { +// return true; +// } else { +// return false; +// } +// } +// +// public int compareTo(char c) { +// if (this.c > c) { +// return 1; +// } else if (this.c < c) { +// return -1; +// } else { +// return 0; +// } +// } +// +// public boolean equals(char c) { +// if (this.c == c) { +// return true; +// } else { +// return false; +// } +// } +// +// @Override +// public int hashCode() { +// // TODO Auto-generated method stub +// return c; +// } +// +// public byte getStatus() { +// return status; +// } +// +// public void setStatus(int status) { +// this.status = (byte) status; +// } +// +// public char getC() { +// return this.c; +// } +// +// public byte getNature() { +// return nature; +// } +// +// public void setNature(byte nature) { +// this.nature = nature; +// } +// +//} +// +//class AnsjArrays { +// private static final int INSERTIONSORT_THRESHOLD = 7; +// +// /** +// * 二分法查找.摘抄了jdk的东西..只不过把他的自动装箱功能给去掉了 +// */ +// public static int binarySearch(WoodInterface[] branches, char c) { +// int high = branches.length - 1; +// if (branches.length < 1) { +// return high; +// } +// int low = 0; +// while (low <= high) { +// int mid = (low + high) >>> 1; +// int cmp = branches[mid].compareTo(c); +// +// if (cmp < 0) { +// low = mid + 1; +// } else if (cmp > 0) { +// high = mid - 1; +// } else { +// return mid; // key found +// } +// } +// return -1; // key not found. +// } +// +// public static void sort(WoodInterface[] a) { +// WoodInterface[] aux = (WoodInterface[]) a.clone(); +// mergeSort(aux, a, 0, a.length, 0); +// } +// +// public static void sort(WoodInterface[] a, int fromIndex, int toIndex) { +// rangeCheck(a.length, fromIndex, toIndex); +// WoodInterface[] aux = copyOfRange(a, fromIndex, toIndex); +// mergeSort(aux, a, fromIndex, toIndex, -fromIndex); +// } +// +// private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) { +// if (fromIndex > toIndex) { +// throw new IllegalArgumentException("fromIndex(" + fromIndex +// + ") > toIndex(" + toIndex + ")"); +// } +// if (fromIndex < 0) { +// throw new ArrayIndexOutOfBoundsException(fromIndex); +// } +// if (toIndex > arrayLen) { +// throw new ArrayIndexOutOfBoundsException(toIndex); +// } +// } +// +// private static void mergeSort(WoodInterface[] src, WoodInterface[] dest, int low, +// int high, int off) { +// int length = high - low; +// +// // Insertion sort on smallest arrays +// if (length < INSERTIONSORT_THRESHOLD) { +// for (int i = low; i < high; i++) +// for (int j = i; j > low +// && (dest[j - 1]).compareTo(dest[j].getC()) > 0; j--) +// swap(dest, j, j - 1); +// return; +// } +// +// // Recursively sort halves of dest into src +// int destLow = low; +// int destHigh = high; +// low += off; +// high += off; +// int mid = (low + high) >>> 1; +// mergeSort(dest, src, low, mid, -off); +// mergeSort(dest, src, mid, high, -off); +// +// // If list is already sorted, just copy from src to dest. This is an +// // optimization that results in faster sorts for nearly ordered lists. +// if (src[mid - 1].compareTo(src[mid].getC()) <= 0) { +// System.arraycopy(src, low, dest, destLow, length); +// return; +// } +// +// // Merge sorted halves (now in src) into dest +// for (int i = destLow, p = low, q = mid; i < destHigh; i++) { +// if (q >= high || p < mid +// && src[p].compareTo(src[q].getC()) <= 0) { +// dest[i] = src[p++]; +// } else { +// dest[i] = src[q++]; +// } +// } +// } +// +// /** +// * Swaps x[a] with x[b]. +// */ +// private static void swap(WoodInterface[] x, int a, int b) { +// WoodInterface t = x[a]; +// x[a] = x[b]; +// x[b] = t; +// } +// +// public static T[] copyOfRange(T[] original, int from, int to) { +// return copyOfRange(original, from, to, (Class) original.getClass()); +// } +// +// public static T[] copyOfRange(U[] original, int from, int to, +// Class newType) { +// int newLength = to - from; +// if (newLength < 0) { +// throw new IllegalArgumentException(from + " > " + to); // } +// T[] copy = ((Object) newType == (Object) Object[].class) ? (T[]) new Object[newLength] +// : (T[]) Array +// .newInstance(newType.getComponentType(), newLength); +// System.arraycopy(original, from, copy, 0, Math.min(original.length +// - from, newLength)); +// return copy; // } - -} - -class MakeLibrary { - - public MakeLibrary() { - } - - // 是否有下一个 - private static boolean hasNext = true; - // 是否是一个词 - private static boolean isWords = true; - - Iterator it = null; - - public Branch getStringTree(Set keyWords) { - it = keyWords.iterator(); - Branch head = new Branch('h', 0, 0); - Branch branch = head; - - while (it.hasNext()) { - /** - * 对于英文字母全部都全部转换成小写 - */ - char[] chars = it.next().toLowerCase().toCharArray(); - for (int i = 0; i < chars.length; i++) { - if (chars.length == (i + 1)) { - isWords = true; - hasNext = false; - } else { - isWords = false; - hasNext = true; - } - int status = 1; - if (isWords && hasNext) { - status = 2; - } - if (!isWords && hasNext) { - status = 1; - } - if (isWords && !hasNext) { - status = 3; - } - branch.add(new Branch(chars[i], status, 0)); - branch = (Branch) branch.get(chars[i]); - } - branch = head; - } - return head; - } -} - -interface WoodInterface { - public WoodInterface add(WoodInterface branch); - - public WoodInterface get(char c); - - public boolean contains(char c); - - public int compareTo(char c); - - public boolean equals(char c); - - public byte getStatus(); - - public char getC(); - - public void setStatus(int status); - - public byte getNature(); - - public void setNature(byte nature); -} - - -class Branch implements WoodInterface { - /** - * status 此字的状态1,继续 2,是个词语但是还可以继续 ,3确定 - * nature 词语性质 - * 0.未知 . 1是姓 . 2 是职位名称 3 是数量级的词 . 4 是数字词语 5 是标点 - */ - WoodInterface[] branches = new WoodInterface[0]; - private char c; - // 状态 - private byte status = 1; - // 索引 - private short index = -1; - // 词性 - private byte nature = 0; - // 单独查找出来的对象 - WoodInterface branch = null; - - public WoodInterface add(WoodInterface branch) { - if ((this.branch = this.get(branch.getC())) != null) { - switch (branch.getStatus()) { - case 1: - if (this.branch.getStatus() == 2) { - this.branch.setStatus(2); - } - if (this.branch.getStatus() == 3) { - this.branch.setStatus(2); - } - break; - case 2: - this.branch.setStatus(2); - case 3: - if (this.branch.getStatus() == 2) { - this.branch.setStatus(2); - } - if (this.branch.getStatus() == 1) { - this.branch.setStatus(2); - } - } - this.branch.setNature(branch.getNature()); - return this.branch; - } - index++; - if ((index + 1) > branches.length) { - branches = Arrays.copyOf(branches, index + 1); - } - branches[index] = branch; - AnsjArrays.sort(branches); - return branch; - } - - public Branch(char c, int status, int nature) { - this.c = c; - this.status = (byte) status; - this.nature = (byte) nature; - } - - int i = 0; - - public WoodInterface get(char c) { - int i = AnsjArrays.binarySearch(branches, c); - if (i > -1) { - return branches[i]; - } - return null; - } - - public boolean contains(char c) { - if (AnsjArrays.binarySearch(branches, c) > -1) { - return true; - } else { - return false; - } - } - - public int compareTo(char c) { - if (this.c > c) { - return 1; - } else if (this.c < c) { - return -1; - } else { - return 0; - } - } - - public boolean equals(char c) { - if (this.c == c) { - return true; - } else { - return false; - } - } - - @Override - public int hashCode() { - // TODO Auto-generated method stub - return c; - } - - public byte getStatus() { - return status; - } - - public void setStatus(int status) { - this.status = (byte) status; - } - - public char getC() { - return this.c; - } - - public byte getNature() { - return nature; - } - - public void setNature(byte nature) { - this.nature = nature; - } - -} - -class AnsjArrays { - private static final int INSERTIONSORT_THRESHOLD = 7; - - /** - * 二分法查找.摘抄了jdk的东西..只不过把他的自动装箱功能给去掉了 - */ - public static int binarySearch(WoodInterface[] branches, char c) { - int high = branches.length - 1; - if (branches.length < 1) { - return high; - } - int low = 0; - while (low <= high) { - int mid = (low + high) >>> 1; - int cmp = branches[mid].compareTo(c); - - if (cmp < 0) { - low = mid + 1; - } else if (cmp > 0) { - high = mid - 1; - } else { - return mid; // key found - } - } - return -1; // key not found. - } - - public static void sort(WoodInterface[] a) { - WoodInterface[] aux = (WoodInterface[]) a.clone(); - mergeSort(aux, a, 0, a.length, 0); - } - - public static void sort(WoodInterface[] a, int fromIndex, int toIndex) { - rangeCheck(a.length, fromIndex, toIndex); - WoodInterface[] aux = copyOfRange(a, fromIndex, toIndex); - mergeSort(aux, a, fromIndex, toIndex, -fromIndex); - } - - private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) { - if (fromIndex > toIndex) { - throw new IllegalArgumentException("fromIndex(" + fromIndex - + ") > toIndex(" + toIndex + ")"); - } - if (fromIndex < 0) { - throw new ArrayIndexOutOfBoundsException(fromIndex); - } - if (toIndex > arrayLen) { - throw new ArrayIndexOutOfBoundsException(toIndex); - } - } - - private static void mergeSort(WoodInterface[] src, WoodInterface[] dest, int low, - int high, int off) { - int length = high - low; - - // Insertion sort on smallest arrays - if (length < INSERTIONSORT_THRESHOLD) { - for (int i = low; i < high; i++) - for (int j = i; j > low - && (dest[j - 1]).compareTo(dest[j].getC()) > 0; j--) - swap(dest, j, j - 1); - return; - } - - // Recursively sort halves of dest into src - int destLow = low; - int destHigh = high; - low += off; - high += off; - int mid = (low + high) >>> 1; - mergeSort(dest, src, low, mid, -off); - mergeSort(dest, src, mid, high, -off); - - // If list is already sorted, just copy from src to dest. This is an - // optimization that results in faster sorts for nearly ordered lists. - if (src[mid - 1].compareTo(src[mid].getC()) <= 0) { - System.arraycopy(src, low, dest, destLow, length); - return; - } - - // Merge sorted halves (now in src) into dest - for (int i = destLow, p = low, q = mid; i < destHigh; i++) { - if (q >= high || p < mid - && src[p].compareTo(src[q].getC()) <= 0) { - dest[i] = src[p++]; - } else { - dest[i] = src[q++]; - } - } - } - - /** - * Swaps x[a] with x[b]. - */ - private static void swap(WoodInterface[] x, int a, int b) { - WoodInterface t = x[a]; - x[a] = x[b]; - x[b] = t; - } - - public static T[] copyOfRange(T[] original, int from, int to) { - return copyOfRange(original, from, to, (Class) original.getClass()); - } - - public static T[] copyOfRange(U[] original, int from, int to, - Class newType) { - int newLength = to - from; - if (newLength < 0) { - throw new IllegalArgumentException(from + " > " + to); - } - T[] copy = ((Object) newType == (Object) Object[].class) ? (T[]) new Object[newLength] - : (T[]) Array - .newInstance(newType.getComponentType(), newLength); - System.arraycopy(original, from, copy, 0, Math.min(original.length - - from, newLength)); - return copy; - } -} +//} diff --git a/cl_search_api/src/main/resources/application.yml b/cl_search_api/src/main/resources/application.yml index 88c0d47..58a1028 100644 --- a/cl_search_api/src/main/resources/application.yml +++ b/cl_search_api/src/main/resources/application.yml @@ -14,13 +14,13 @@ server: spring: datasource: - driver-class-name: com.mysql.jdbc.Driver -# username: root -# password: Bfd123!@# -# url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + driver-class-name: com.mysql.cj.jdbc.Driver username: crawl - password: D5HLOvk553DUNV62qJI= - url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + password: crawl123 + url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC +# username: root +# password: bfd123 +# url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC hikari: maximum-pool-size: 10 minimum-idle: 1 @@ -47,8 +47,8 @@ bfd.api.mf: indexNamePre : cl_major_ es-mini: - name: SQ_Mini - address: 172.18.1.147:9313 + name: SQ_Mini_2 + address: 172.18.1.81:9301 upper: 2018-09-01 standby: cl_major_* es-normal: