commit 98d7406a99f5626f2cb1a5e145170a674654d820 Author: liushuang Date: Wed Mar 5 14:14:54 2025 +0800 init diff --git a/knows-java/pom.xml b/knows-java/pom.xml new file mode 100644 index 0000000..00bab2f --- /dev/null +++ b/knows-java/pom.xml @@ -0,0 +1,158 @@ + + + 4.0.0 + + org.springframework.boot + spring-boot-starter-parent + 3.3.2 + + + com.zhych + knows + 0.0.1-SNAPSHOT + embeddings + embeddings + + + 17 + + + + org.springframework.boot + spring-boot-starter-web + + + org.apache.commons + commons-lang3 + 3.12.0 + + + org.projectlombok + lombok + true + + + org.springframework.boot + spring-boot-starter-test + test + + + org.springframework.boot + spring-boot-starter-data-elasticsearch + + + com.alibaba + fastjson + 2.0.15 + compile + + + cn.hutool + hutool-all + 5.8.25 + + + com.squareup.okhttp3 + okhttp + 5.0.0-alpha.3 + + + org.apache.httpcomponents + httpclient + 4.5.13 + + + org.elasticsearch.client + elasticsearch-rest-high-level-client + 7.17.23 + + + co.elastic.clients + elasticsearch-java + 8.13.4 + + + com.fasterxml.jackson.core + jackson-databind + 2.15.2 + + + com.alibaba + dashscope-sdk-java + 2.8.3 + + + + org.apache.pdfbox + pdfbox + 2.0.24 + + + + net.sourceforge.tess4j + tess4j + 5.7.0 + + + + org.bytedeco + opencv-platform + 4.7.0-1.5.9 + + + + + org.apache.poi + poi + 5.2.3 + + + org.apache.poi + poi-ooxml + 5.2.3 + + + org.apache.poi + poi-scratchpad + 5.2.3 + + + + + + + src/main/resources + + **/* + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + + org.projectlombok + lombok + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.0.0-M5 + + + -Xmx2048m + -Djava.library.path=${project.basedir}/lib/opencv + + + + + + + diff --git a/knows-java/src/main/java/cn/luckday/Application.java b/knows-java/src/main/java/cn/luckday/Application.java new file mode 100644 index 0000000..fb425ff --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/Application.java @@ -0,0 +1,15 @@ +package cn.luckday; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.context.annotation.ComponentScan; + +@SpringBootApplication +@ComponentScan(value = {"cn.luckday.*"}) +public class Application { + + public static void main(String[] args) { + SpringApplication.run(Application.class, args); + } + +} diff --git a/knows-java/src/main/java/cn/luckday/bean/KnowsIndex.java b/knows-java/src/main/java/cn/luckday/bean/KnowsIndex.java new file mode 100644 index 0000000..88cdc3c --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/bean/KnowsIndex.java @@ -0,0 +1,21 @@ +package cn.luckday.bean; + +import lombok.Data; + +@Data +public class KnowsIndex { + + private String id; + + private String file_name; + + private String file_path; + + private String file_type; + + private String file_size; + + private String content; + + private double[] content_vec; +} \ No newline at end of file diff --git a/knows-java/src/main/java/cn/luckday/bean/SearchResult.java b/knows-java/src/main/java/cn/luckday/bean/SearchResult.java new file mode 100644 index 0000000..3111155 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/bean/SearchResult.java @@ -0,0 +1,13 @@ +package cn.luckday.bean; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class SearchResult { + private KnowsIndex knowsIndex; + private Double score; +} diff --git a/knows-java/src/main/java/cn/luckday/controller/KnowsController.java b/knows-java/src/main/java/cn/luckday/controller/KnowsController.java new file mode 100644 index 0000000..e37fa99 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/controller/KnowsController.java @@ -0,0 +1,112 @@ +package cn.luckday.controller; + +import cn.hutool.core.collection.CollUtil; +import cn.luckday.llm.QwenClient; +import com.alibaba.dashscope.aigc.generation.GenerationResult; +import com.alibaba.dashscope.exception.InputRequiredException; +import com.alibaba.dashscope.exception.NoApiKeyException; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; +import cn.luckday.bean.SearchResult; +import cn.luckday.embed.EmbedClient; +import cn.luckday.embed.ReRankClient; +import cn.luckday.llm.OllamaClient; +import cn.luckday.service.EsDocumentService; +import jakarta.annotation.Resource; +import jakarta.servlet.http.HttpServletResponse; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +@Slf4j +@RestController +@RequestMapping("/knows") +public class KnowsController { + + @Value("${embedding.uri}") + private String embeddingUri; + + @Value("${embedding.api-key}") + private String embeddingApiKey; + + @Value("${re-rank.uri}") + private String ReRankUri; + + @Value("${re-rank.api-key}") + private String ReRankApiKey; + + @Value("${oll.uri}") + private String ollUri; + + @Value("${qwen.api-key}") + private static String apiKey; + + @Value("${qwen.model}") + private static String model; + + @Resource + private EsDocumentService service; + + @PostMapping("/process") + public List process(@RequestBody Map dto) throws IOException { + String keyword = dto.get("keyword"); + return service.searchVector(EmbedClient.getEmbedding(embeddingUri, embeddingApiKey, keyword)); + } + + @PostMapping("/generate") + public void generate(HttpServletResponse response, @RequestBody Map dto) throws IOException, NoApiKeyException, InputRequiredException { + String keyword = dto.get("keyword"); + List searchResults = service.searchVector(EmbedClient.getEmbedding(embeddingUri, embeddingApiKey, keyword)); + List contents = searchResults.stream().map(searchResult -> searchResult.getKnowsIndex().getContent()).toList(); + log.info("搜索结果searchResults: {} ", contents); + + Object reRankPassages = ""; + if (CollUtil.isNotEmpty(searchResults)) { + // 重排处理 + List contentList = new ArrayList<>(); + searchResults.forEach(searchResult -> contentList.add(searchResult.getKnowsIndex().getContent())); + String reRank = ReRankClient.reRank(ReRankUri, ReRankApiKey, contentList, keyword); + log.info("重排结果reRank: {} ", reRank); + + JSONObject jsonObject = JSON.parseObject(reRank, JSONObject.class); + reRankPassages = jsonObject.get("rerank_passages"); + } + + // LLM总结回答 + OllamaClient.sendMsg(response, ollUri, keyword, reRankPassages.toString()); + } + + @PostMapping("/qwen-generate") + public String qwen(@RequestBody Map dto) throws IOException, NoApiKeyException, InputRequiredException { + String keyword = dto.get("keyword"); + List searchResults = service.searchVector(EmbedClient.getEmbedding(embeddingUri, embeddingApiKey, keyword)); + List contents = searchResults.stream().map(searchResult -> searchResult.getKnowsIndex().getContent()).toList(); + log.info("搜索结果searchResults: {} ", contents); + + Object reRankPassages = ""; + if (CollUtil.isNotEmpty(searchResults)) { + // 重排处理 + List contentList = new ArrayList<>(); + searchResults.forEach(searchResult -> contentList.add(searchResult.getKnowsIndex().getContent())); + String reRank = ReRankClient.reRank(ReRankUri, ReRankApiKey, contentList, keyword); + log.info("重排结果reRank: {} ", reRank); + + JSONObject jsonObject = JSON.parseObject(reRank, JSONObject.class); + reRankPassages = jsonObject.get("rerank_passages"); + } + + // LLM总结回答 + GenerationResult result = QwenClient.sendMsg(model, apiKey, keyword, reRankPassages.toString()); + String content = result.getOutput().getChoices().get(0).getMessage().getContent(); + log.info("千问: {}", content); + return content; + } +} diff --git a/knows-java/src/main/java/cn/luckday/controller/RedFileController.java b/knows-java/src/main/java/cn/luckday/controller/RedFileController.java new file mode 100644 index 0000000..6c15c13 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/controller/RedFileController.java @@ -0,0 +1,22 @@ +package cn.luckday.controller; + +import cn.luckday.service.RedFileService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; +import org.springframework.web.multipart.MultipartFile; +import java.util.*; + +@RestController +@RequestMapping("/api/file") +public class RedFileController { + + @Autowired + private RedFileService redFileService; + + @PostMapping("/upload") + public ResponseEntity uploadFile(@RequestParam("file") MultipartFile file) { + redFileService.uploadFile(file); + return ResponseEntity.ok(Map.of("message", "文件上传并解析成功")); + } +} diff --git a/knows-java/src/main/java/cn/luckday/document/Main.java b/knows-java/src/main/java/cn/luckday/document/Main.java new file mode 100644 index 0000000..5012274 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/document/Main.java @@ -0,0 +1,60 @@ +package cn.luckday.document; + +import java.io.File; +import java.util.List; +import java.awt.image.BufferedImage; + +public class Main { + public static void main(String[] args) { + try { + // 验证文件是否存在 + String pdfPath = "D:\\小红书文档\\中频\\运营经验库\\方法论\\PDF\\评论区和私信的互动指引的方法论.pdf"; + File pdfFile = new File(pdfPath); + if (!pdfFile.exists()) { + System.err.println("PDF文件不存在: " + pdfPath); + return; + } + + // 初始化PDFParser时添加错误处理 + PDFParser parser = new PDFParser(pdfPath); + try { + parser.parse(); + } catch (Exception e) { + System.err.println("PDF解析失败: " + e.getMessage()); + e.printStackTrace(); + } + + // 获取结果 + List texts = parser.getExtractedText(); + List images = parser.getExtractedImages(); + List tables = parser.getExtractedTables(); + + // // 处理Word文档 + // String wordPath = "D:\\小红书文档\\高频\\平台知识库\\已处理word\\新模式开票流程及注意事项.docx"; + // WordProcessor wordProcessor = new WordProcessor(wordPath); + // wordProcessor.process(); + // + // // 获取提取的文本 + // List textContent = wordProcessor.getExtractedText(); + // for (String text : textContent) { + // System.out.println(text); + // } + // + // // 处理表格 + // List tables = wordProcessor.getExtractedTables(); + // for (XWPFTable table : tables) { + // List> tableData = wordProcessor.convertTableToList(table); + // System.out.println("表格数据:" + tableData); + // + // // 导出表格为CSV + // wordProcessor.exportTableToCSV(table, "table_output.csv"); + // } + // + // // 保存图片 + // wordProcessor.saveImages("output_images"); + } catch (Exception e) { + System.err.println("程序执行出错: " + e.getMessage()); + e.printStackTrace(); + } + } +} diff --git a/knows-java/src/main/java/cn/luckday/document/OCRProcessor.java b/knows-java/src/main/java/cn/luckday/document/OCRProcessor.java new file mode 100644 index 0000000..abef4e3 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/document/OCRProcessor.java @@ -0,0 +1,180 @@ +package cn.luckday.document; + +import net.sourceforge.tess4j.Tesseract; +import org.opencv.core.CvType; +import org.opencv.core.Mat; +import org.opencv.core.Size; +import org.opencv.imgproc.Imgproc; + +import java.awt.image.BufferedImage; +import java.io.File; +import java.awt.image.DataBufferByte; +import org.apache.pdfbox.pdmodel.PDDocument; + +public class OCRProcessor { + static { + try { + // 从资源目录加载本地库 + String libraryPath = OCRProcessor.class + .getClassLoader() + .getResource("native/" + System.mapLibraryName("opencv_java4110")) + .getPath(); + + System.load(libraryPath); + } catch (Exception e) { + e.printStackTrace(); + } + } + + private final Tesseract tesseract; + + public OCRProcessor() { + tesseract = new Tesseract(); + initializeTesseract(); + } + + private void initializeTesseract() { + try { + // 设置Tesseract数据路径 + String tessdataPath = System.getenv("TESSDATA_PREFIX"); + if (tessdataPath == null || tessdataPath.isEmpty()) { + tessdataPath ="D:\\study\\backend\\embeddingstoes-master\\src\\main\\resources\\ocr"; + } + + tesseract.setDatapath(tessdataPath); + + // 修改:使用不依赖OSD的页面分割模式 + tesseract.setPageSegMode(3); + + // 设置语言包 + tesseract.setLanguage("chi_sim"); + + // 性能优化配置 + tesseract.setTessVariable("tessedit_create_pdf", "0"); + tesseract.setTessVariable("tessedit_create_hocr", "0"); + tesseract.setTessVariable("tessedit_write_images", "0"); + + } catch (Exception e) { + throw new RuntimeException("Tesseract 初始化失败: " + e.getMessage(), e); + } + } + + public String performOCR(BufferedImage image) { + try { + // 基本图像验证 + if (image == null || image.getWidth() < 10 || image.getHeight() < 10) { + throw new IllegalArgumentException("无效的图像"); + } + + // 预处理图像 + BufferedImage processedImage = preprocessImage(image); + + // 执行OCR + return tesseract.doOCR(processedImage); + + } catch (Exception e) { + System.err.println("OCR处理失败: " + e.getMessage()); + e.printStackTrace(); + return ""; + } + } + + private BufferedImage preprocessImage(BufferedImage image) { + try { + Mat mat = bufferedImageToMat(image); + + // 调整预处理步骤 + // 1. 转换为灰度图 + Mat gray = new Mat(); + Imgproc.cvtColor(mat, gray, Imgproc.COLOR_BGR2GRAY); + + // 2. 使用OTSU二值化替代自适应阈值 + Mat binary = new Mat(); + Imgproc.threshold(gray, binary, 0, 255, Imgproc.THRESH_BINARY + Imgproc.THRESH_OTSU); + + // 3. 添加形态学操作 + Mat kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(3, 3)); + Mat processed = new Mat(); + Imgproc.morphologyEx(binary, processed, Imgproc.MORPH_CLOSE, kernel); + + // 4. 边缘增强 + Mat enhanced = new Mat(); + Imgproc.GaussianBlur(processed, enhanced, new Size(3, 3), 0); + + return matToBufferedImage(enhanced); + } catch (Exception e) { + e.printStackTrace(); + return image; + } + } + + private Mat bufferedImageToMat(BufferedImage image) { + // 转换图像类型为 TYPE_3BYTE_BGR,如果需要的话 + BufferedImage convertedImage = image; + if (image.getType() != BufferedImage.TYPE_3BYTE_BGR) { + convertedImage = new BufferedImage( + image.getWidth(), + image.getHeight(), + BufferedImage.TYPE_3BYTE_BGR); + convertedImage.getGraphics().drawImage(image, 0, 0, null); + } + + // 获取图像数据 + byte[] pixels = ((DataBufferByte) convertedImage.getRaster().getDataBuffer()).getData(); + + // 创建Mat对象 + Mat mat = new Mat( + convertedImage.getHeight(), + convertedImage.getWidth(), + CvType.CV_8UC3); + mat.put(0, 0, pixels); + + return mat; + } + + private BufferedImage matToBufferedImage(Mat mat) { + // 确保mat是8位3通道或单通道 + int type = BufferedImage.TYPE_3BYTE_BGR; + if (mat.channels() == 1) { + type = BufferedImage.TYPE_BYTE_GRAY; + } + + // 获取mat的数据 + byte[] pixels = new byte[mat.channels() * mat.cols() * mat.rows()]; + mat.get(0, 0, pixels); + + // 创建BufferedImage + BufferedImage image = new BufferedImage( + mat.cols(), + mat.rows(), + type); + + // 设置图像数据 + byte[] targetPixels = ((DataBufferByte) image.getRaster().getDataBuffer()).getData(); + System.arraycopy(pixels, 0, targetPixels, 0, pixels.length); + + return image; + } + + public void processPDF(String pdfPath) { + try { + // 添加内存使用监控 + Runtime runtime = Runtime.getRuntime(); + long maxMemory = runtime.maxMemory() / (1024 * 1024); + System.out.println("最大可用内存: " + maxMemory + "MB"); + + // 原有PDF处理代码 + PDDocument document = PDDocument.load(new File(pdfPath)); + // ... existing code ... + + // 确保资源释放 + document.close(); + } catch (OutOfMemoryError e) { + System.err.println("内存不足: " + e.getMessage()); + // TODO 日志记录 + } catch (Exception e) { + System.err.println("处理PDF时发生错误: " + e.getMessage()); + e.printStackTrace(); + } + } +} \ No newline at end of file diff --git a/knows-java/src/main/java/cn/luckday/document/PDFParser.java b/knows-java/src/main/java/cn/luckday/document/PDFParser.java new file mode 100644 index 0000000..1ceffa1 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/document/PDFParser.java @@ -0,0 +1,137 @@ +package cn.luckday.document; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.text.PDFTextStripper; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class PDFParser { + private final String pdfPath; + private PDDocument document; + private final OCRProcessor ocrProcessor; + private List extractedText; + private List extractedImages; + private List
extractedTables; + + public PDFParser(String pdfPath) { + this.pdfPath = pdfPath; + this.ocrProcessor = new OCRProcessor(); + this.extractedText = new ArrayList<>(); + this.extractedImages = new ArrayList<>(); + this.extractedTables = new ArrayList<>(); + } + + public void parse() { + try { + document = PDDocument.load(new File(pdfPath)); + + // 1. 解析文本内容 + System.out.println("=== 开始解析文本 ==="); + extractText(); + + // 2. 解析图片 + System.out.println("\n=== 开始解析图片 ==="); + extractImages(); + + // 3. 解析表格 +// System.out.println("\n=== 开始解析表格 ==="); +// extractTables(); + + document.close(); + } catch (Exception e) { + System.err.println("PDF解析失败: " + e.getMessage()); + e.printStackTrace(); + if (document != null) { + try { + document.close(); + } catch (IOException ignored) { + } + } + } + } + + private void extractText() throws IOException { + System.out.println("正在提取PDF文本..."); + + // 只使用PDFTextStripper提取文本 + PDFTextStripper stripper = new PDFTextStripper(); + String text = stripper.getText(document); + System.out.println("文本内容:\n" + text); + extractedText.add(text); + } + + private void extractImages() throws IOException { + System.out.println("正在提取并处理PDF图片..."); + int imageCounter = 0; + + for (PDPage page : document.getPages()) { + for (COSName name : page.getResources().getXObjectNames()) { + PDXObject object = page.getResources().getXObject(name); + if (object instanceof PDImageXObject) { + PDImageXObject image = (PDImageXObject) object; + BufferedImage bImage = image.getImage(); + + // 保存图片 + String imagePath = "output_images/extracted_image_" + imageCounter + ".png"; + ImageIO.write(bImage, "PNG", new File(imagePath)); + System.out.println("已保存图片: " + imagePath); + + // OCR处理图片 + try { + System.out.println("正在对图片 " + imageCounter + " 进行OCR处理..."); + String imageText = ocrProcessor.performOCR(bImage); + if (!imageText.trim().isEmpty()) { + System.out.println("图片 " + imageCounter + " OCR结果:\n" + imageText); + extractedText.add("【图片" + imageCounter + "文本】\n" + imageText); + } else { + System.out.println("图片 " + imageCounter + " 未识别出文本"); + } + } catch (Exception e) { + System.err.println("处理图片 " + imageCounter + " 时出错: " + e.getMessage()); + } + + extractedImages.add(bImage); + imageCounter++; + } + } + } + System.out.println("共处理 " + imageCounter + " 张图片"); + } + + private void extractTables() { + System.out.println("正在提取PDF表格..."); + TableDetector detector = new TableDetector(document); + extractedTables = detector.detectTables(); + + if (extractedTables.isEmpty()) { + System.out.println("未检测到表格"); + } else { + System.out.println("共检测到 " + extractedTables.size() + " 个表格"); + for (int i = 0; i < extractedTables.size(); i++) { + System.out.println("表格 " + (i + 1) + ":\n" + extractedTables.get(i)); + } + } + } + + // Getter方法 + public List getExtractedText() { + return extractedText; + } + + public List getExtractedImages() { + return extractedImages; + } + + public List
getExtractedTables() { + return extractedTables; + } +} \ No newline at end of file diff --git a/knows-java/src/main/java/cn/luckday/document/Table.java b/knows-java/src/main/java/cn/luckday/document/Table.java new file mode 100644 index 0000000..83d5ff1 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/document/Table.java @@ -0,0 +1,43 @@ +package cn.luckday.document; + +public class Table { + private String content; + private int rows; + private int columns; + + public Table(String content) { + this.content = content; + analyzeStructure(); + } + + private void analyzeStructure() { + if (content == null || content.isEmpty()) { + return; + } + + // 按行分割内容 + String[] lines = content.split("\n"); + rows = lines.length; + + // 分析列数(基于空格或制表符分隔) + columns = 0; + for (String line : lines) { + String[] cells = line.trim().split("\\s+"); + columns = Math.max(columns, cells.length); + } + } + + public int getRows() { + return rows; + } + + public int getColumns() { + return columns; + } + + @Override + public String toString() { + return String.format("Table{rows=%d, columns=%d, content='%s'}", + rows, columns, content); + } +} diff --git a/knows-java/src/main/java/cn/luckday/document/TableDetector.java b/knows-java/src/main/java/cn/luckday/document/TableDetector.java new file mode 100644 index 0000000..abf5d5c --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/document/TableDetector.java @@ -0,0 +1,170 @@ +package cn.luckday.document; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.text.PDFTextStripperByArea; + +import java.awt.Rectangle; +import java.util.ArrayList; +import java.util.List; + +public class TableDetector { + private final PDDocument document; + + public TableDetector(PDDocument document) { + this.document = document; + } + + public List
detectTables() { + List
tables = new ArrayList<>(); + try { + for (PDPage page : document.getPages()) { + // 使用文本位置分析来检测表格 + PDFTextStripperByArea stripper = new PDFTextStripperByArea(); + stripper.setSortByPosition(true); + + // 检测表格边界 + List tableRegions = detectTableRegions(page); + + for (Rectangle region : tableRegions) { + stripper.addRegion("table", region); + stripper.extractRegions(page); + String tableContent = stripper.getTextForRegion("table"); + tables.add(new Table(tableContent)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } + return tables; + } + + private List detectTableRegions(PDPage page) { + List regions = new ArrayList<>(); + try { + // 获取页面尺寸 + float pageHeight = page.getMediaBox().getHeight(); + float pageWidth = page.getMediaBox().getWidth(); + + // 使用PDFTextStripperByArea进行文本分析 + PDFTextStripperByArea stripper = new PDFTextStripperByArea(); + stripper.setSortByPosition(true); + + // 将页面划分为网格进行分析 + int gridRows = 20; + int gridCols = 20; + float cellHeight = pageHeight / gridRows; + float cellWidth = pageWidth / gridCols; + + // 存储每个网格单元的文本密度 + int[][] textDensity = new int[gridRows][gridCols]; + + // 分析每个网格单元 + for (int row = 0; row < gridRows; row++) { + for (int col = 0; col < gridCols; col++) { + Rectangle cell = new Rectangle( + (int) (col * cellWidth), + (int) (row * cellHeight), + (int) cellWidth, + (int) cellHeight); + + stripper.addRegion("cell_" + row + "_" + col, cell); + stripper.extractRegions(page); + String cellText = stripper.getTextForRegion("cell_" + row + "_" + col); + + // 计算文本密度 + textDensity[row][col] = cellText.trim().length(); + } + } + + // 检测表格区域 + List potentialTables = findPotentialTables(textDensity, gridRows, gridCols); + + // 转换检测到的区域为实际坐标 + for (TableRegion tableRegion : potentialTables) { + Rectangle rect = new Rectangle( + (int) (tableRegion.startCol * cellWidth), + (int) (tableRegion.startRow * cellHeight), + (int) ((tableRegion.endCol - tableRegion.startCol + 1) * cellWidth), + (int) ((tableRegion.endRow - tableRegion.startRow + 1) * cellHeight)); + regions.add(rect); + } + + } catch (Exception e) { + e.printStackTrace(); + } + return regions; + } + + private List findPotentialTables(int[][] textDensity, int rows, int cols) { + List tables = new ArrayList<>(); + boolean[][] visited = new boolean[rows][cols]; + + // 遍历网格寻找潜在的表格区域 + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + if (!visited[i][j] && isTableCell(textDensity, i, j)) { + TableRegion region = new TableRegion(); + expandTableRegion(textDensity, visited, i, j, region); + if (isValidTable(region)) { + tables.add(region); + } + } + } + } + return tables; + } + + private boolean isTableCell(int[][] density, int row, int col) { + // 判断是否为表格单元格的条件 + // 1. 文本密度适中 + // 2. 周围有类似的文本密度分布 + int cellDensity = density[row][col]; + return cellDensity > 0 && cellDensity < 100; // 可调整阈值 + } + + private void expandTableRegion(int[][] density, boolean[][] visited, + int row, int col, TableRegion region) { + if (row < 0 || row >= density.length || + col < 0 || col >= density[0].length || + visited[row][col] || + !isTableCell(density, row, col)) { + return; + } + + visited[row][col] = true; + + // 更新表格区域的边界 + region.updateBounds(row, col); + + // 递归检查相邻单元格 + expandTableRegion(density, visited, row - 1, col, region); // 上 + expandTableRegion(density, visited, row + 1, col, region); // 下 + expandTableRegion(density, visited, row, col - 1, region); // 左 + expandTableRegion(density, visited, row, col + 1, region); // 右 + } + + private boolean isValidTable(TableRegion region) { + // 验证检测到的区域是否可能是表格 + int width = region.endCol - region.startCol + 1; + int height = region.endRow - region.startRow + 1; + + // 表格至少应该有2x2的大小 + return width >= 2 && height >= 2; + } + + // 表格区域数据结构 + private static class TableRegion { + int startRow = Integer.MAX_VALUE; + int startCol = Integer.MAX_VALUE; + int endRow = Integer.MIN_VALUE; + int endCol = Integer.MIN_VALUE; + + void updateBounds(int row, int col) { + startRow = Math.min(startRow, row); + startCol = Math.min(startCol, col); + endRow = Math.max(endRow, row); + endCol = Math.max(endCol, col); + } + } +} \ No newline at end of file diff --git a/knows-java/src/main/java/cn/luckday/document/WordProcessor.java b/knows-java/src/main/java/cn/luckday/document/WordProcessor.java new file mode 100644 index 0000000..ed9efa2 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/document/WordProcessor.java @@ -0,0 +1,287 @@ +package cn.luckday.document; + +import org.apache.poi.xwpf.usermodel.*; +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.usermodel.Range; +import org.apache.poi.hwpf.usermodel.Table; +import org.apache.poi.hwpf.usermodel.TableRow; +import org.apache.poi.hwpf.usermodel.TableCell; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.poi.common.usermodel.PictureType; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class WordProcessor { + private final String filePath; + private List extractedText; + private List extractedTables; + private List extractedImages; + + public WordProcessor(String filePath) { + this.filePath = filePath; + this.extractedText = new ArrayList<>(); + this.extractedTables = new ArrayList<>(); + this.extractedImages = new ArrayList<>(); + } + + public void process() { + File file = new File(filePath); + if (filePath.endsWith(".docx")) { + processDocx(file); + } else if (filePath.endsWith(".doc")) { + processDoc(file); + } else { + throw new IllegalArgumentException("不支持的文件格式:" + filePath); + } + } + + private void processDocx(File file) { + try (FileInputStream fis = new FileInputStream(file); + XWPFDocument document = new XWPFDocument(fis)) { + + // 提取文本 + extractTextFromDocx(document); + + // 提取表格 + extractTablesFromDocx(document); + + // 提取图片 + extractImagesFromDocx(document); + + } catch (IOException e) { + e.printStackTrace(); + } + } + + private void processDoc(File file) { + try (FileInputStream fis = new FileInputStream(file); + POIFSFileSystem fs = new POIFSFileSystem(fis)) { + + HWPFDocument document = new HWPFDocument(fs); + + // 提取文本 + Range range = document.getRange(); + extractTextFromDoc(range); + + // 提取表格 + extractTablesFromDoc(range); + + // 提取图片(如果可能) + extractImagesFromDoc(document); + + } catch (IOException e) { + e.printStackTrace(); + } + } + + private void extractTextFromDocx(XWPFDocument document) { + // 提取段落文本 + for (XWPFParagraph paragraph : document.getParagraphs()) { + String text = paragraph.getText().trim(); + if (!text.isEmpty()) { + extractedText.add(text); + } + } + } + + private void extractTablesFromDocx(XWPFDocument document) { + // 提取表格 + for (XWPFTable table : document.getTables()) { + extractedTables.add(table); + + // 处理表格内容 + for (XWPFTableRow row : table.getRows()) { + StringBuilder rowContent = new StringBuilder(); + for (XWPFTableCell cell : row.getTableCells()) { + rowContent.append(cell.getText()).append("\t"); + } + extractedText.add("表格行:" + rowContent.toString().trim()); + } + } + } + + private void extractImagesFromDocx(XWPFDocument document) { + // 提取图片 + for (XWPFParagraph paragraph : document.getParagraphs()) { + for (XWPFRun run : paragraph.getRuns()) { + List pictures = run.getEmbeddedPictures(); + extractedImages.addAll(pictures); + } + } + } + + private void extractTextFromDoc(Range range) { + String text = range.text(); + // 按段落分割 + String[] paragraphs = text.split("\\r?\\n"); + for (String paragraph : paragraphs) { + if (!paragraph.trim().isEmpty()) { + extractedText.add(paragraph.trim()); + } + } + } + + private void extractTablesFromDoc(Range range) { + for (int i = 0; i < range.numParagraphs(); i++) { + if (range.getParagraph(i).isInTable()) { + Table table = range.getTable(range.getParagraph(i)); + processDocTable(table); + // 跳过表格中的其他段落 + i += table.numParagraphs() - 1; + } + } + } + + private void processDocTable(Table table) { + List> tableData = new ArrayList<>(); + for (int rowIdx = 0; rowIdx < table.numRows(); rowIdx++) { + TableRow row = table.getRow(rowIdx); + List rowData = new ArrayList<>(); + + for (int colIdx = 0; colIdx < row.numCells(); colIdx++) { + TableCell cell = row.getCell(colIdx); + String cellText = cell.text().trim(); + if (cellText.endsWith("\u0007")) { + cellText = cellText.substring(0, cellText.length() - 1); + } + rowData.add(cellText); + } + + tableData.add(rowData); + extractedText.add("表格行:" + String.join("\t", rowData)); + } + } + + private void extractImagesFromDoc(HWPFDocument document) { + // 注意:HWPF对图片的支持有限 + try { + List pictures = document.getPicturesTable().getAllPictures(); + File outputDir = new File("output_images"); + if (!outputDir.exists()) { + outputDir.mkdirs(); + } + + int imageCounter = 0; + for (org.apache.poi.hwpf.usermodel.Picture picture : pictures) { + String extension = picture.suggestFileExtension(); + String filename = String.format("doc_image_%d.%s", imageCounter++, extension); + Path outputPath = Paths.get(outputDir.getPath(), filename); + + // 保存图片数据 + Files.write(outputPath, picture.getContent()); + } + } catch (Exception e) { + System.out.println("警告:提取.doc文件中的图片时出错:" + e.getMessage()); + } + } + + public void saveImages(String outputDir) { + try { + File dir = new File(outputDir); + if (!dir.exists()) { + dir.mkdirs(); + } + + int imageCounter = 0; + for (XWPFPicture picture : extractedImages) { + // 获取图片数据 + byte[] pictureData = picture.getPictureData().getData(); + + // 确定图片扩展名 + String extension = getImageExtension(picture.getPictureData().getPictureType()); + String filename = String.format("image_%d.%s", imageCounter++, extension); + + // 保存图片 + Path outputPath = Paths.get(dir.getPath(), filename); + Files.write(outputPath, pictureData); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + private String getImageExtension(int pictureType) { + // 使用PictureType的常量来处理图片类型 + if (pictureType == PictureType.PNG.getOoxmlId()) { + return "png"; + } else if (pictureType == PictureType.JPEG.getOoxmlId()) { + return "jpg"; + } else if (pictureType == PictureType.GIF.getOoxmlId()) { + return "gif"; + } else if (pictureType == PictureType.TIFF.getOoxmlId()) { + return "tiff"; + } else if (pictureType == PictureType.BMP.getOoxmlId()) { + return "bmp"; + } else if (pictureType == PictureType.EMF.getOoxmlId()) { + return "emf"; + } else if (pictureType == PictureType.WMF.getOoxmlId()) { + return "wmf"; + } else if (pictureType == PictureType.PICT.getOoxmlId()) { + return "pict"; + } else if (pictureType == PictureType.DIB.getOoxmlId()) { + return "dib"; + } else { + return "unknown"; + } + } + + public List getExtractedText() { + return extractedText; + } + + public List getExtractedTables() { + return extractedTables; + } + + public List getExtractedImages() { + return extractedImages; + } + + // 将表格转换为结构化数据 + public List> convertTableToList(XWPFTable table) { + List> tableData = new ArrayList<>(); + + for (XWPFTableRow row : table.getRows()) { + List rowData = new ArrayList<>(); + for (XWPFTableCell cell : row.getTableCells()) { + rowData.add(cell.getText().trim()); + } + tableData.add(rowData); + } + + return tableData; + } + + // 导出表格为CSV格式 + public void exportTableToCSV(XWPFTable table, String outputPath) { + try { + StringBuilder csv = new StringBuilder(); + + for (XWPFTableRow row : table.getRows()) { + List rowData = new ArrayList<>(); + for (XWPFTableCell cell : row.getTableCells()) { + // 处理CSV中的特殊字符 + String cellText = cell.getText().trim() + .replace("\"", "\"\"") + .replace(",", "\",\""); + rowData.add("\"" + cellText + "\""); + } + csv.append(String.join(",", rowData)).append("\n"); + } + + java.nio.file.Files.write( + new File(outputPath).toPath(), + csv.toString().getBytes()); + + } catch (IOException e) { + e.printStackTrace(); + } + } +} \ No newline at end of file diff --git a/knows-java/src/main/java/cn/luckday/embed/EmbedClient.java b/knows-java/src/main/java/cn/luckday/embed/EmbedClient.java new file mode 100644 index 0000000..8f9c3f8 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/embed/EmbedClient.java @@ -0,0 +1,83 @@ +package cn.luckday.embed; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; +import okhttp3.*; + +import java.io.IOException; +import java.util.*; + +public class EmbedClient { + + public static double[] getEmbedding(String uri, String apiKey, String inputText) throws IOException { + OkHttpClient client = new OkHttpClient(); + + // 创建请求体 + JSONObject requestBody = new JSONObject(); + requestBody.put("input", Collections.singletonList(inputText)); + + // 创建请求 + MediaType mediaType = MediaType.parse("application/json; charset=utf-8"); + RequestBody body = RequestBody.Companion.create(requestBody.toJSONString(), mediaType); + Request request = new Request.Builder() + .url(uri) + .addHeader("Authorization", "Bearer " + apiKey) + .addHeader("Content-Type", "application/json") + .post(body) + .build(); + + // 发送请求 + Response response = client.newCall(request).execute(); + if (!response.isSuccessful()) { + throw new IOException("Unexpected code " + response); + } + + // 解析JSON响应 + String responseBody = response.body().string(); + EmbeddingResponse embeddingResponse = JSON.parseObject(responseBody, EmbeddingResponse.class); + + // 返回嵌入向量 + return embeddingResponse.getData().get(0).getEmbedding(); + } + + static class EmbeddingResponse { + private List data; + + public List getData() { + return data; + } + + public void setData(List data) { + this.data = data; + } + } + + static class Data { + private double[] embedding; + private int index; + private String object; + + public double[] getEmbedding() { + return embedding; + } + + public void setEmbedding(double[] embedding) { + this.embedding = embedding; + } + + public int getIndex() { + return index; + } + + public void setIndex(int index) { + this.index = index; + } + + public String getObject() { + return object; + } + + public void setObject(String object) { + this.object = object; + } + } +} \ No newline at end of file diff --git a/knows-java/src/main/java/cn/luckday/embed/ReRankClient.java b/knows-java/src/main/java/cn/luckday/embed/ReRankClient.java new file mode 100644 index 0000000..0752ab6 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/embed/ReRankClient.java @@ -0,0 +1,34 @@ +package cn.luckday.embed; + +import com.alibaba.fastjson.JSONObject; +import okhttp3.*; + +import java.io.IOException; +import java.util.List; + +public class ReRankClient { + + public static String reRank(String uri, String apiKey, List textsList, String query) throws IOException { + OkHttpClient client = new OkHttpClient(); + JSONObject requestBody = new JSONObject(); + String[] texts = textsList.toArray(new String[0]); + requestBody.put("textList", texts); + requestBody.put("query", query); + // 创建请求 + MediaType mediaType = MediaType.parse("application/json; charset=utf-8"); + RequestBody body = RequestBody.Companion.create(requestBody.toJSONString(), mediaType); + Request request = new Request.Builder() + .url(uri) + .addHeader("Authorization", "Bearer " + apiKey) + .addHeader("Content-Type", "application/json") + .post(body) + .build(); + + // 发送请求 + Response response = client.newCall(request).execute(); + if (!response.isSuccessful()) { + throw new IOException("Unexpected code " + response); + } + return response.body().string(); + } +} \ No newline at end of file diff --git a/knows-java/src/main/java/cn/luckday/filter/AccessControlFilter.java b/knows-java/src/main/java/cn/luckday/filter/AccessControlFilter.java new file mode 100644 index 0000000..d2ebb6a --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/filter/AccessControlFilter.java @@ -0,0 +1,46 @@ +package cn.luckday.filter; + +import jakarta.servlet.*; +import jakarta.servlet.annotation.WebFilter; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import java.io.IOException; + +@Component +@WebFilter(urlPatterns = "/*", asyncSupported = true) +@Order(1) +public class AccessControlFilter implements Filter { + + @Override + public void init(FilterConfig filterConfig) throws ServletException { + } + + @Override + public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException { + HttpServletRequest httpServletRequest = (HttpServletRequest) request; + HttpServletResponse httpServletResponse = (HttpServletResponse) response; + + // 获取源站 + String origin = httpServletRequest.getHeader("origin"); + httpServletResponse.setHeader("Access-Control-Allow-Origin", "*"); + httpServletResponse.setHeader("Access-Control-Allow-Headers", "Content-Type,Content-Length, Authorization, Accept,X-Requested-With,cors, content-type, luck-token, userId, user, type"); + httpServletResponse.setHeader("Access-Control-Allow-Credentials", "true"); + httpServletResponse.setHeader("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,PATCH,OPTIONS"); + httpServletResponse.setHeader("Access-Control-Max-Age", "3600"); + + if ("OPTIONS".equals(httpServletRequest.getMethod())) { + httpServletResponse.setStatus(HttpServletResponse.SC_OK); + } else { + chain.doFilter(request, response); + } + } + + @Override + public void destroy() { + + } + +} diff --git a/knows-java/src/main/java/cn/luckday/llm/OllamaClient.java b/knows-java/src/main/java/cn/luckday/llm/OllamaClient.java new file mode 100644 index 0000000..cb53696 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/llm/OllamaClient.java @@ -0,0 +1,85 @@ +package cn.luckday.llm; + +import com.alibaba.fastjson2.JSON; +import jakarta.servlet.http.HttpServletResponse; + +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.HashMap; + +public class OllamaClient { + + private static Map PARAMS = new HashMap<>(); + private static Map OPTIONS = new HashMap<>(); + + static { + OPTIONS.put("temperature", 0.3); // # 控制随机性(0-1,值越大越随机) + OPTIONS.put("top_p", 0.5); // # 采样策略(0-1,值越小越集中) + OPTIONS.put("max_tokens", 1024); // # 生成的最大 token 数 + + PARAMS.put("model", "deepseek-r1:32b"); + PARAMS.put("stream", true); + PARAMS.put("options", OPTIONS); + } + + public static String PROMPT = "你是一个知识库,必须严格按照知识库检索的内容做最精简的回答,只回答关键信息,坚决杜绝胡编乱造,注意字数。" + + "当所有知识库内容都与产品问题无关时,或者知识库检索到任何相关信息时,你的回答必须是“没有找到”这句话。" + + " 以下是知识库:\n" + + " { %content% }\n" + + " 以上是知识库。 \n 以下是提问:"; + + public static void sendMsg(HttpServletResponse response, String uri, String query, String content) { + try { + // 设置SSE必要的响应头 + response.setContentType("text/event-stream"); + response.setCharacterEncoding("UTF-8"); + response.setHeader("Cache-Control", "no-cache"); + response.setHeader("Connection", "keep-alive"); + + URL url = new URL(uri); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("POST"); + conn.setRequestProperty("Accept", "text/event-stream"); + conn.setRequestProperty("Content-Type", "application/json"); + conn.setDoOutput(true); + + PARAMS.put("prompt", PROMPT.replace("%content%", content) + query); + String json = JSON.toJSONString(PARAMS); + + try (OutputStream os = conn.getOutputStream()) { + os.write(json.getBytes(StandardCharsets.UTF_8)); + } + + int responseCode = conn.getResponseCode(); + + if (responseCode >= HttpURLConnection.HTTP_OK && responseCode < HttpURLConnection.HTTP_USE_PROXY) { + try (BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8)); + PrintWriter writer = response.getWriter()) { + + String line; + while ((line = br.readLine()) != null) { + if (!line.trim().isEmpty()) { + // 构造SSE消息格式 + writer.write("data: " + line + "\n\n"); + writer.flush(); + } + } + } + } else { + throw new RuntimeException("Failed : HTTP error code : " + responseCode); + } + } catch (Exception e) { + try { + response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + PrintWriter writer = response.getWriter(); + writer.write("data: {\"error\": \"" + e.getMessage() + "\"}\n\n"); + writer.flush(); + } catch (IOException ioe) { + e.printStackTrace(); + } + } + } +} diff --git a/knows-java/src/main/java/cn/luckday/llm/QwenClient.java b/knows-java/src/main/java/cn/luckday/llm/QwenClient.java new file mode 100644 index 0000000..70a4e04 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/llm/QwenClient.java @@ -0,0 +1,45 @@ +package cn.luckday.llm; + +import java.util.Arrays; +import com.alibaba.dashscope.aigc.generation.Generation; +import com.alibaba.dashscope.aigc.generation.GenerationParam; +import com.alibaba.dashscope.aigc.generation.GenerationResult; +import com.alibaba.dashscope.common.Message; +import com.alibaba.dashscope.common.Role; +import com.alibaba.dashscope.exception.ApiException; +import com.alibaba.dashscope.exception.InputRequiredException; +import com.alibaba.dashscope.exception.NoApiKeyException; + +public class QwenClient { + + public static GenerationResult sendMsg(String model, String apiKey, String query, String content) throws ApiException, NoApiKeyException, InputRequiredException { + Generation gen = new Generation(); + + Message systemMsg = Message.builder() + .role(Role.SYSTEM.getValue()) + .content("你是一个知识库,必须严格按照知识库检索的内容做最精简的回答,只回答关键信息,坚决杜绝胡编乱造,注意数字。" + + "当所有知识库内容都与产品问题无关时,或者知识库检索到任何相关信息时,你的回答必须是“没有找到”这句话。" + + " 以下是知识库:\n" + + " {" + content + "}\n" + + " 以上是知识库。") + .build(); + + Message userMsg = Message.builder() + .role(Role.USER.getValue()) + .content(query) + .build(); + + GenerationParam param = GenerationParam.builder() + .model(model) + .messages(Arrays.asList(systemMsg, userMsg)) + .resultFormat(GenerationParam.ResultFormat.MESSAGE) + .apiKey(apiKey) + .topK(50) + .temperature(0.1f) + .topP(0.8) + .seed(1234) + .build(); + + return gen.call(param); + } +} \ No newline at end of file diff --git a/knows-java/src/main/java/cn/luckday/service/EsDocumentService.java b/knows-java/src/main/java/cn/luckday/service/EsDocumentService.java new file mode 100644 index 0000000..f883d1f --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/service/EsDocumentService.java @@ -0,0 +1,143 @@ +package cn.luckday.service; + + +import co.elastic.clients.elasticsearch.ElasticsearchClient; +import co.elastic.clients.elasticsearch._types.Script; +import co.elastic.clients.elasticsearch._types.query_dsl.*; +import co.elastic.clients.elasticsearch.core.IndexResponse; +import co.elastic.clients.elasticsearch.core.SearchResponse; +import co.elastic.clients.elasticsearch.indices.CreateIndexRequest; +import co.elastic.clients.elasticsearch.indices.CreateIndexResponse; +import co.elastic.clients.json.JsonData; +import cn.luckday.bean.SearchResult; +import cn.luckday.bean.KnowsIndex; +import cn.luckday.embed.EmbedClient; +import jakarta.annotation.Resource; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.io.IOException; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +@Slf4j +@Service +public class EsDocumentService { + + @Value("${embedding.uri}") + private String embeddingUri; + + @Value("${embedding.api-key}") + private String embeddingApiKey; + + @Resource + private ElasticsearchClient client; + + public static final String INDEX_NAME = "knows_index"; + + public static final float SIMILARITY_THRESHOLD = 0.2f; + + /** + * 创建索引 + * @throws IOException 异常 + */ + public void createIndex() throws IOException { + CreateIndexRequest request = new CreateIndexRequest.Builder() + .index(INDEX_NAME) + + .mappings(m -> m + .properties("file_name", p -> p.keyword(k -> k)) + .properties("file_path", p -> p.keyword(k -> k)) + .properties("file_type", p -> p.keyword(k -> k)) + .properties("file_size", p -> p.keyword(k -> k)) + .properties("remark_vec", p -> p + .denseVector(dv -> dv + .dims(1024) + .index(true) + .similarity("cosine") + ) + ) + .properties("remark", p -> p + .text(t -> t) + ) +// .properties("remark", p -> p +// .text(t -> t.searchAnalyzer("ik_smart") +// .analyzer("ik_smart") // 使用 IK 分词器 +// ) +// ) + ) + .build(); + + CreateIndexResponse createIndexResponse = client.indices().create(request); + log.info("Index created: {}", createIndexResponse.acknowledged()); + } + + /** + * 添加数据 + * @param knowsIndexList 数据 + * @throws IOException 异常 + */ + public void indexSellList(List knowsIndexList) throws IOException { + for (KnowsIndex knowsIndex : knowsIndexList) { + knowsIndex.setContent_vec(EmbedClient.getEmbedding(embeddingUri, embeddingApiKey, knowsIndex.getContent())); + IndexResponse response = client.index(i -> i + .index(INDEX_NAME) + .id(knowsIndex.getId()) + .document(knowsIndex) + ); + log.info("Sell indexed: {}", response.id()); + } + } + + + /** + * 检索 + * + * @param queryVector 向量 + */ + public List searchVector(double[] queryVector) throws IOException { + // 创建向量相似度查询 + ScriptScoreQuery scriptScoreQuery = ScriptScoreQuery.of(q -> q + .query(QueryBuilders.matchAll().build()._toQuery()) + .script(Script.of(s -> s.inline(i -> i + .source("double score = cosineSimilarity(params.query_vector, 'content_vec'); " + + "score = Math.min(1.0, Math.max(0.0, score)); " + // 确保评分在[0, 1]之间 + "if (score < params.threshold) { return 0; } else { return score; }") + .params(Map.of( + "query_vector", JsonData.of(queryVector), + "threshold", JsonData.of(SIMILARITY_THRESHOLD) // 将阈值作为参数传递给脚本 + )))))); + + // 创建bool查询,向量相似度查询作为should子句 + Query boolQuery = QueryBuilders.bool(b -> b + .should(scriptScoreQuery._toQuery()) + ); + + Query functionScoreQuery = QueryBuilders.functionScore(fs -> fs + .query(boolQuery) + .scoreMode(FunctionScoreMode.Max) + .boostMode(FunctionBoostMode.Replace) + .minScore((double) SIMILARITY_THRESHOLD) + ); + + // 执行合并后的查询 + SearchResponse combinedSearchResponse = client.search(s -> s + .index(INDEX_NAME) + .query(functionScoreQuery), + KnowsIndex.class); + + // 处理查询的结果 + return combinedSearchResponse.hits().hits().stream() + .map(hit -> { + double finalScore = Objects.nonNull(hit.score()) ? hit.score() : 0.0; + return finalScore >= SIMILARITY_THRESHOLD ? new SearchResult(hit.source(), finalScore) : null; + }) + .filter(Objects::nonNull) + .sorted(Comparator.comparingDouble(SearchResult::getScore).reversed()) + .collect(Collectors.toList()); + } +} \ No newline at end of file diff --git a/knows-java/src/main/java/cn/luckday/service/RedFileService.java b/knows-java/src/main/java/cn/luckday/service/RedFileService.java new file mode 100644 index 0000000..9648952 --- /dev/null +++ b/knows-java/src/main/java/cn/luckday/service/RedFileService.java @@ -0,0 +1,135 @@ +package cn.luckday.service; + +import cn.hutool.core.util.IdUtil; +import cn.luckday.bean.KnowsIndex; +import cn.luckday.embed.EmbedClient; +import cn.luckday.document.PDFParser; +import cn.luckday.document.WordProcessor; +import jakarta.annotation.Resource; +import org.apache.poi.xwpf.usermodel.XWPFPicture; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +import java.awt.image.BufferedImage; +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@Service +public class RedFileService { + private static final String TEMP_DIR = "src/main/resources/temp_uploads"; + + @Value("${embedding.uri}") + private String embeddingUri; + + @Value("${embedding.api-key}") + private String embeddingApiKey; + + @Resource + private EsDocumentService esDocumentService; + + public void uploadFile(MultipartFile file) { + try { + String projectPath = System.getProperty("user.dir"); + Path tempDirPath = Paths.get(projectPath, TEMP_DIR); + if (!Files.exists(tempDirPath)) { + Files.createDirectories(tempDirPath); + } + + // 获取文件名和扩展名 + String originalFilename = file.getOriginalFilename(); + String fileExtension = getFileExtension(originalFilename); + + // 生成临时文件路径 + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss")); + String tempFileName = timestamp + "_" + originalFilename; + Path tempFilePath = Paths.get(projectPath, TEMP_DIR, tempFileName); + + // 保存上传的文件 + file.transferTo(tempFilePath.toFile()); + + // 解析文件内容 + Map parsedContent = parseFile(tempFilePath.toString(), fileExtension); + + // 保存到 Elasticsearch + String text = parsedContent.get("text").toString(); + + KnowsIndex knowsIndex = new KnowsIndex(); + knowsIndex.setId(String.valueOf(IdUtil.getSnowflakeNextId())); + knowsIndex.setContent(text); + knowsIndex.setContent_vec(EmbedClient.getEmbedding(embeddingUri, embeddingApiKey, text)); + esDocumentService.indexSellList(Arrays.asList(knowsIndex)); + + // 清理临时文件 + Files.deleteIfExists(tempFilePath); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + private String getFileExtension(String filename) { + if (filename == null) + return ""; + int lastDotIndex = filename.lastIndexOf('.'); + return (lastDotIndex == -1) ? "" : filename.substring(lastDotIndex + 1).toLowerCase(); + } + + private Map parseFile(String filePath, String extension) throws Exception { + Map content = new HashMap<>(); + + switch (extension) { + case "pdf": + PDFParser pdfParser = new PDFParser(filePath); + pdfParser.parse(); + + // 获取解析结果 + List texts = pdfParser.getExtractedText(); + List images = pdfParser.getExtractedImages(); + + // 合并所有文本 + StringBuilder fullText = new StringBuilder(); + for (String text : texts) { + fullText.append(text).append("\n"); + } + + content.put("text", fullText.toString()); + content.put("imageCount", images.size()); + break; + + case "docx": + WordProcessor wordProcessor = new WordProcessor(filePath); + wordProcessor.process(); + + List extractedText = wordProcessor.getExtractedText(); + // 合并所有文本 + StringBuilder docxFullText = new StringBuilder(); + for (String text : extractedText) { + docxFullText.append(text).append("\n"); + } + + List extractedImages = wordProcessor.getExtractedImages(); + content.put("text", docxFullText.toString()); + content.put("imageCount", extractedImages.size()); + break; + + default: + throw new IllegalArgumentException("不支持的文件类型: " + extension); + } + + // 添加元数据 + content.put("filename", new File(filePath).getName()); + content.put("uploadTime", LocalDateTime.now().toString()); + content.put("fileType", extension); + + return content; + } +} diff --git a/knows-java/src/main/resources/application.yml b/knows-java/src/main/resources/application.yml new file mode 100644 index 0000000..80fade2 --- /dev/null +++ b/knows-java/src/main/resources/application.yml @@ -0,0 +1,32 @@ +server: + port: 8899 + +spring: + servlet: + multipart: + max-file-size: 10MB + max-request-size: 10MB + main: + allow-bean-definition-overriding: true + application: + name: knows + + elasticsearch: + uris: 172.16.100.47:9200 +# username: elastic +# password: 123456 + +qwen: + api-key: sk-********************** + model: qwen-plus + +oll: + uri: http://172.16.90.4:11434/api/generate + +embedding: + uri: http://172.16.90.4:6009/v1/embed + api-key: sk-abcdefg1234567 + +re-rank: + uri: http://172.16.90.4:6010/v1/reRank + api-key: sk-abcdefg1234567 \ No newline at end of file diff --git a/knows-java/src/main/resources/native/opencv_java4110.dll b/knows-java/src/main/resources/native/opencv_java4110.dll new file mode 100644 index 0000000..4f11fd6 Binary files /dev/null and b/knows-java/src/main/resources/native/opencv_java4110.dll differ diff --git a/knows-java/src/main/resources/ocr/chi_sim.traineddata b/knows-java/src/main/resources/ocr/chi_sim.traineddata new file mode 100644 index 0000000..da7fa49 Binary files /dev/null and b/knows-java/src/main/resources/ocr/chi_sim.traineddata differ diff --git a/knows-java/src/main/resources/ocr/eng.traineddata b/knows-java/src/main/resources/ocr/eng.traineddata new file mode 100644 index 0000000..176dc32 Binary files /dev/null and b/knows-java/src/main/resources/ocr/eng.traineddata differ diff --git a/knows-java/src/main/resources/ocr/osd.traineddata b/knows-java/src/main/resources/ocr/osd.traineddata new file mode 100644 index 0000000..527457c Binary files /dev/null and b/knows-java/src/main/resources/ocr/osd.traineddata differ diff --git a/knows-java/src/test/java/cn/luckday/ApplicationTests.java b/knows-java/src/test/java/cn/luckday/ApplicationTests.java new file mode 100644 index 0000000..c943c75 --- /dev/null +++ b/knows-java/src/test/java/cn/luckday/ApplicationTests.java @@ -0,0 +1,24 @@ +package cn.luckday; + +import cn.luckday.service.EsDocumentService; +import jakarta.annotation.Resource; +import org.junit.jupiter.api.Test; +import org.springframework.boot.test.context.SpringBootTest; + +import java.io.IOException; + +@SpringBootTest +class ApplicationTests { + + @Test + void contextLoads() { + } + + @Resource + private EsDocumentService service; + + @Test + void create() throws IOException { + service.createIndex(); + } +} diff --git a/konws-python/embed/Dockerfile b/konws-python/embed/Dockerfile new file mode 100644 index 0000000..1f5817f --- /dev/null +++ b/konws-python/embed/Dockerfile @@ -0,0 +1,18 @@ +# 使用官方Python运行时作为父镜像 +FROM python:3.10 + +# 设置工作目录 +WORKDIR /app + +# 将当前目录内容复制到容器的/app中 +ADD . /app + +RUN pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple +# 安装程序需要的包 +RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple + +# 运行时监听的端口 +EXPOSE 6009 + +# 运行app.py时的命令及其参数 +CMD ["uvicorn", "embed:app", "--host", "0.0.0.0", "--port", "6009"] \ No newline at end of file diff --git a/konws-python/embed/embed.py b/konws-python/embed/embed.py new file mode 100644 index 0000000..d6b4e39 --- /dev/null +++ b/konws-python/embed/embed.py @@ -0,0 +1,76 @@ +import os +from typing import List + +import numpy as np +import uvicorn +from fastapi import FastAPI, Depends, HTTPException, status +from fastapi.middleware.cors import CORSMiddleware +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from pydantic import BaseModel +from sentence_transformers import SentenceTransformer, models + +# 环境变量传入 +sk_key = os.environ.get('sk-key', 'sk-aaabbbcccdddeeefffggghhhiiijjjkkk') + +# 创建一个FastAPI实例 +app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 创建一个HTTPBearer实例 +security = HTTPBearer() +# 加载预训练的 Transformer 模型 +transformer_model = models.Transformer('./m3e-large', cache_dir='./cache') + +# 创建 Mean Pooling 层 +pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(), pooling_mode='mean') + +# 构建 SentenceTransformer 模型 +model = SentenceTransformer(modules=[transformer_model, pooling_model]) + + +class EmbeddingRequest(BaseModel): + input: List[str] + + +class EmbeddingResponse(BaseModel): + data: list + dimension: int + + +@app.post("/v1/embed", response_model=EmbeddingResponse) +async def get_embed(request: EmbeddingRequest, credentials: HTTPAuthorizationCredentials = Depends(security)): + if credentials.credentials != sk_key: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authorization code", + ) + + # 计算嵌入向量和tokens数量 + embeddings = [model.encode(text) for text in request.input] + # 归一化处理 + embeddings = [embedding / np.linalg.norm(embedding) for embedding in embeddings] + # 将numpy数组转换为列表 + embeddings = [embedding.tolist() for embedding in embeddings] + + response = { + "data": [ + { + "embedding": embedding, + "index": index + } for index, embedding in enumerate(embeddings) + ], + "dimension": len(embeddings[0]) + } + + return response + + +if __name__ == "__main__": + uvicorn.run("embed:app", host='0.0.0.0', port=6009, workers=2) diff --git a/konws-python/embed/requirements.txt b/konws-python/embed/requirements.txt new file mode 100644 index 0000000..8ea776c --- /dev/null +++ b/konws-python/embed/requirements.txt @@ -0,0 +1,10 @@ +fastapi==0.99.1 +pydantic==1.10.7 +sentence-transformers==3.3.1 +uvicorn==0.23.1 +numpy==1.24.4 +scipy==1.10.1 +scikit-learn==1.3.0 +torchvision +torchaudio +torch \ No newline at end of file diff --git a/konws-python/rerank/Dockerfile b/konws-python/rerank/Dockerfile new file mode 100644 index 0000000..e035575 --- /dev/null +++ b/konws-python/rerank/Dockerfile @@ -0,0 +1,18 @@ +# 使用官方Python运行时作为父镜像 +FROM python:3.10 + +# 设置工作目录 +WORKDIR /app + +# 将当前目录内容复制到容器的/app中 +ADD . /app + +RUN pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple +# 安装程序需要的包 +RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple + +# 运行时监听的端口 +EXPOSE 6010 + +# 运行app.py时的命令及其参数 +CMD ["uvicorn", "rerank:app", "--host", "0.0.0.0", "--port", "6010"] \ No newline at end of file diff --git a/konws-python/rerank/requirements.txt b/konws-python/rerank/requirements.txt new file mode 100644 index 0000000..ac138e6 --- /dev/null +++ b/konws-python/rerank/requirements.txt @@ -0,0 +1,12 @@ +fastapi==0.99.1 +pydantic==1.10.7 +uvicorn==0.23.1 +tiktoken==0.4.0 +numpy==1.24.4 +scipy==1.10.1 +scikit-learn==1.5.0 +torchvision +torchaudio +torch +BCEmbedding==0.1.5 +starlette~=0.27.0 \ No newline at end of file diff --git a/konws-python/rerank/rerank.py b/konws-python/rerank/rerank.py new file mode 100644 index 0000000..e2f5957 --- /dev/null +++ b/konws-python/rerank/rerank.py @@ -0,0 +1,58 @@ +import os +from typing import List +import uvicorn +from BCEmbedding import RerankerModel +from fastapi import FastAPI, Depends, HTTPException, status +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from pydantic import BaseModel +from starlette.middleware.cors import CORSMiddleware + +# 环境变量传入 +sk_key = os.environ.get('sk-key', 'sk-aaabbbcccdddeeefffggghhhiiijjjkkk...') + +# 创建一个FastAPI实例 +app = FastAPI() + +# 添加CORS中间件 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # 允许所有来源 + allow_credentials=True, + allow_methods=["*"], # 允许所有方法 + allow_headers=["*"], # 允许所有头部 +) + +# 创建一个HTTPBearer实例 +security = HTTPBearer() + +# 初始化模型 +model = RerankerModel(model_name_or_path="./bce-reranker-base_v1") + + +class ReRankRequest(BaseModel): + textList: List[str] + query: str + + +class ReRankResponse(BaseModel): + rerank_passages: List[str] + rerank_scores: List[float] + rerank_ids: List[int] + + +# 定义路由,处理rerank请求 +@app.post("/v1/reRank", response_model=ReRankResponse) +async def get_embeddings(request: ReRankRequest, credentials: HTTPAuthorizationCredentials = Depends(security)): + if credentials.credentials != sk_key: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authorization code", + ) + query = request.query + passages = request.textList + return model.rerank(query, passages) + + +# 运行应用 +if __name__ == "__main__": + uvicorn.run("rerank:app", host='0.0.0.0', port=6010, workers=2) diff --git a/konws-web/chatbox.html b/konws-web/chatbox.html new file mode 100644 index 0000000..81a8daa --- /dev/null +++ b/konws-web/chatbox.html @@ -0,0 +1,296 @@ + + + + + DeepSeek 32B Chat + + + + +
+
+
+ + +
+
+ + +
+ +
+ + +
+
+ × +

上传文件

+
+ +
+ +

点击或拖拽文件到此处上传

+

支持的格式: PDF, DOC, DOCX

+
+
+ +
+
+
+ + + + diff --git a/konws-web/css/main.css b/konws-web/css/main.css new file mode 100644 index 0000000..32b2591 --- /dev/null +++ b/konws-web/css/main.css @@ -0,0 +1,337 @@ +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; + max-width: 800px; + margin: 0 auto; + padding: 0; + background-color: #f5f5f5; + height: 100vh; + display: flex; + flex-direction: column; +} + +#chatBox { + flex: 1; + background: #ededed; + padding: 20px; + overflow-y: auto; + margin: 50px; + border-radius: 18px; + position: relative; + display: flex; + flex-direction: column; + padding-bottom: 80px; + scrollbar-width: none; + box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1); + /* Firefox */ + -ms-overflow-style: none; + /* IE and Edge */ +} + +#chatBox::-webkit-scrollbar { + display: none; + /* Chrome, Safari, Opera */ +} + +.messages-container { + flex: 1; + overflow-y: auto; + padding: 20px; + scrollbar-width: none; + /* Firefox */ + -ms-overflow-style: none; + /* IE and Edge */ +} + +.messages-container::-webkit-scrollbar { + display: none; + /* Chrome, Safari, Opera */ +} + +.message { + margin: 10px 0; + padding: 10px 15px; + border-radius: 4px; + max-width: 70%; + word-wrap: break-word; + position: relative; + line-height: 1.5; + font-size: 15px; + width: max-content; + display: flex; + align-items: flex-start; + gap: 10px; +} + +.avatar { + width: 40px; + height: 40px; + border-radius: 50%; + flex-shrink: 0; +} + +.message-content { + padding: 10px 15px; + border-radius: 15px; +} + +.user-message { + width: max-content; + margin-left: auto; + flex-direction: row-reverse; +} + +.user-message .message-content { + background: #95ec69; + border-radius: 15px 0 15px 15px; +} + +.bot-message { + background: white; + margin-right: auto; + border-radius: 0 15px 15px 15px; +} + +#inputArea { + position: absolute; + bottom: 20px; + left: 50%; + transform: translateX(-50%); + display: flex; + gap: 10px; + padding: 15px; + background: white; + border-radius: 20px; + box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1); + width: calc(100% - 100px); + max-width: 600px; + box-sizing: border-box; +} + +#userInput { + flex-grow: 1; + padding: 8px 12px; + border: 1px solid #ddd; + border: none; + border-radius: 4px; + font-size: 15px; + background: white; + outline: none; +} + +#userInput:focus { + border-color: #07c160; +} + +button { + display: flex; + justify-content: center; + align-items: center; + background: #07c160; + color: white; + border: none; + border-radius: 4px; + cursor: pointer; + transition: background 0.2s; + font-size: 15px; + border-radius: 50%; + width: 40px; + height: 40px; + position: absolute; + right: 18px; +} + +button:hover { + background: #06ae56; +} + +button:disabled { + background: #9fd7b5; + cursor: not-allowed; +} + +/* Markdown 样式优化 */ +.message pre { + background: #f8f9fa; + padding: 12px; + border-radius: 4px; + overflow-x: auto; + margin: 8px 0; + font-size: 14px; +} + +.message code { + font-family: Menlo, Monaco, Consolas, "Courier New", monospace; + background: rgba(0, 0, 0, 0.05); + padding: 2px 4px; + border-radius: 3px; + font-size: 14px; +} + +.message p { + margin: 0; +} + +.message p+p { + margin-top: 8px; +} + +/* 滚动条样式 */ +#chatBox::-webkit-scrollbar { + width: 6px; +} + +#chatBox::-webkit-scrollbar-track { + background: #f1f1f1; +} + +#chatBox::-webkit-scrollbar-thumb { + background: #c1c1c1; + border-radius: 3px; +} + +#chatBox::-webkit-scrollbar-thumb:hover { + background: #a8a8a8; +} + +/* 适配移动端 */ +@media (max-width: 768px) { + body { + max-width: 100%; + height: 100vh; + padding: 0; + } + + .message { + max-width: 85%; + } + + #inputArea { + padding: 10px; + } +} + +.upload-button-container { + position: fixed; + bottom: 20px; + right: 20px; + z-index: 1000; +} + +.upload-btn { + background-color: #000; + color: white; + border: none; + border-radius: 50%; + cursor: pointer; + font-size: 16px; + transition: background-color 0.3s; + bottom: 50px; +} + +.upload-btn:hover { + background-color: #1a1a1a; +} + +.upload-dialog { + display: none; + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + background-color: rgba(0, 0, 0, 0.5); + z-index: 1001; +} + +.upload-dialog-content { + position: relative; + background-color: #fefefe; + margin: 15% auto; + padding: 20px; + border-radius: 5px; + width: 60%; + max-width: 500px; +} + +.close-btn { + position: absolute; + right: 10px; + top: 5px; + font-size: 24px; + cursor: pointer; + color: #888; +} + +.close-btn:hover { + color: #555; +} + +.upload-area { + border: 2px dashed #ccc; + border-radius: 5px; + padding: 20px; + text-align: center; + margin: 20px 0; + cursor: pointer; + transition: border-color 0.3s; +} + +.upload-area:hover { + border-color: #4caf50; +} + +.upload-placeholder { + color: #666; +} + +.upload-placeholder i { + font-size: 48px; + color: #4caf50; + margin-bottom: 10px; +} + +.supported-formats { + font-size: 12px; + color: #888; + margin-top: 5px; +} + +.upload-progress { + margin: 15px 0; +} + +.progress-bar { + width: 100%; + height: 20px; + background-color: #f0f0f0; + border-radius: 10px; + overflow: hidden; +} + +.progress-fill { + width: 0%; + height: 100%; + background-color: #4caf50; + transition: width 0.3s; +} + +.progress-text { + display: block; + text-align: center; + margin-top: 5px; + color: #666; +} + +.upload-status { + margin-top: 10px; + text-align: center; + color: #666; +} + +.upload-status.success { + color: #4caf50; +} + +.upload-status.error { + color: #f44336; +} \ No newline at end of file diff --git a/konws-web/images/bot-avatar.png b/konws-web/images/bot-avatar.png new file mode 100644 index 0000000..6d6c02b Binary files /dev/null and b/konws-web/images/bot-avatar.png differ diff --git a/konws-web/images/user-avatar.png b/konws-web/images/user-avatar.png new file mode 100644 index 0000000..02b3f81 Binary files /dev/null and b/konws-web/images/user-avatar.png differ diff --git a/konws-web/js/marked.min.js b/konws-web/js/marked.min.js new file mode 100644 index 0000000..717c9fc --- /dev/null +++ b/konws-web/js/marked.min.js @@ -0,0 +1,6 @@ +/** + * marked v15.0.7 - a markdown parser + * Copyright (c) 2011-2025, Christopher Jeffrey. (MIT Licensed) + * https://github.com/markedjs/marked + */ +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).marked={})}(this,(function(e){"use strict";function t(){return{async:!1,breaks:!1,extensions:null,gfm:!0,hooks:null,pedantic:!1,renderer:null,silent:!1,tokenizer:null,walkTokens:null}}function n(t){e.defaults=t}e.defaults={async:!1,breaks:!1,extensions:null,gfm:!0,hooks:null,pedantic:!1,renderer:null,silent:!1,tokenizer:null,walkTokens:null};const s={exec:()=>null};function r(e,t=""){let n="string"==typeof e?e:e.source;const s={replace:(e,t)=>{let r="string"==typeof t?t:t.source;return r=r.replace(i.caret,"$1"),n=n.replace(e,r),s},getRegex:()=>new RegExp(n,t)};return s}const i={codeRemoveIndent:/^(?: {1,4}| {0,3}\t)/gm,outputLinkReplace:/\\([\[\]])/g,indentCodeCompensation:/^(\s+)(?:```)/,beginningSpace:/^\s+/,endingHash:/#$/,startingSpaceChar:/^ /,endingSpaceChar:/ $/,nonSpaceChar:/[^ ]/,newLineCharGlobal:/\n/g,tabCharGlobal:/\t/g,multipleSpaceGlobal:/\s+/g,blankLine:/^[ \t]*$/,doubleBlankLine:/\n[ \t]*\n[ \t]*$/,blockquoteStart:/^ {0,3}>/,blockquoteSetextReplace:/\n {0,3}((?:=+|-+) *)(?=\n|$)/g,blockquoteSetextReplace2:/^ {0,3}>[ \t]?/gm,listReplaceTabs:/^\t+/,listReplaceNesting:/^ {1,4}(?=( {4})*[^ ])/g,listIsTask:/^\[[ xX]\] /,listReplaceTask:/^\[[ xX]\] +/,anyLine:/\n.*\n/,hrefBrackets:/^<(.*)>$/,tableDelimiter:/[:|]/,tableAlignChars:/^\||\| *$/g,tableRowBlankLine:/\n[ \t]*$/,tableAlignRight:/^ *-+: *$/,tableAlignCenter:/^ *:-+: *$/,tableAlignLeft:/^ *:-+ *$/,startATag:/^/i,startPreScriptTag:/^<(pre|code|kbd|script)(\s|>)/i,endPreScriptTag:/^<\/(pre|code|kbd|script)(\s|>)/i,startAngleBracket:/^$/,pedanticHrefTitle:/^([^'"]*[^\s])\s+(['"])(.*)\2/,unicodeAlphaNumeric:/[\p{L}\p{N}]/u,escapeTest:/[&<>"']/,escapeReplace:/[&<>"']/g,escapeTestNoEncode:/[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/,escapeReplaceNoEncode:/[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/g,unescapeTest:/&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/gi,caret:/(^|[^\[])\^/g,percentDecode:/%25/g,findPipe:/\|/g,splitPipe:/ \|/,slashPipe:/\\\|/g,carriageReturn:/\r\n|\r/g,spaceLine:/^ +$/gm,notSpaceStart:/^\S*/,endingNewline:/\n$/,listItemRegex:e=>new RegExp(`^( {0,3}${e})((?:[\t ][^\\n]*)?(?:\\n|$))`),nextBulletRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`),hrRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`),fencesBeginRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}(?:\`\`\`|~~~)`),headingBeginRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}#`),htmlBeginRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}<(?:[a-z].*>|!--)`,"i")},l=/^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/,o=/(?:[*+-]|\d{1,9}[.)])/,a=/^(?!bull |blockCode|fences|blockquote|heading|html|table)((?:.|\n(?!\s*?\n|bull |blockCode|fences|blockquote|heading|html|table))+?)\n {0,3}(=+|-+) *(?:\n+|$)/,c=r(a).replace(/bull/g,o).replace(/blockCode/g,/(?: {4}| {0,3}\t)/).replace(/fences/g,/ {0,3}(?:`{3,}|~{3,})/).replace(/blockquote/g,/ {0,3}>/).replace(/heading/g,/ {0,3}#{1,6}/).replace(/html/g,/ {0,3}<[^\n>]+>\n/).replace(/\|table/g,"").getRegex(),h=r(a).replace(/bull/g,o).replace(/blockCode/g,/(?: {4}| {0,3}\t)/).replace(/fences/g,/ {0,3}(?:`{3,}|~{3,})/).replace(/blockquote/g,/ {0,3}>/).replace(/heading/g,/ {0,3}#{1,6}/).replace(/html/g,/ {0,3}<[^\n>]+>\n/).replace(/table/g,/ {0,3}\|?(?:[:\- ]*\|)+[\:\- ]*\n/).getRegex(),p=/^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/,u=/(?!\s*\])(?:\\.|[^\[\]\\])+/,g=r(/^ {0,3}\[(label)\]: *(?:\n[ \t]*)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n[ \t]*)?| *\n[ \t]*)(title))? *(?:\n+|$)/).replace("label",u).replace("title",/(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/).getRegex(),k=r(/^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/).replace(/bull/g,o).getRegex(),d="address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|search|section|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul",f=/|$))/,x=r("^ {0,3}(?:<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)|comment[^\\n]*(\\n+|$)|<\\?[\\s\\S]*?(?:\\?>\\n*|$)|\\n*|$)|\\n*|$)|)[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$))","i").replace("comment",f).replace("tag",d).replace("attribute",/ +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/).getRegex(),b=r(p).replace("hr",l).replace("heading"," {0,3}#{1,6}(?:\\s|$)").replace("|lheading","").replace("|table","").replace("blockquote"," {0,3}>").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|textarea|!--)").replace("tag",d).getRegex(),w={blockquote:r(/^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/).replace("paragraph",b).getRegex(),code:/^((?: {4}| {0,3}\t)[^\n]+(?:\n(?:[ \t]*(?:\n|$))*)?)+/,def:g,fences:/^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/,heading:/^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,hr:l,html:x,lheading:c,list:k,newline:/^(?:[ \t]*(?:\n|$))+/,paragraph:b,table:s,text:/^[^\n]+/},m=r("^ *([^\\n ].*)\\n {0,3}((?:\\| *)?:?-+:? *(?:\\| *:?-+:? *)*(?:\\| *)?)(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)").replace("hr",l).replace("heading"," {0,3}#{1,6}(?:\\s|$)").replace("blockquote"," {0,3}>").replace("code","(?: {4}| {0,3}\t)[^\\n]").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|textarea|!--)").replace("tag",d).getRegex(),y={...w,lheading:h,table:m,paragraph:r(p).replace("hr",l).replace("heading"," {0,3}#{1,6}(?:\\s|$)").replace("|lheading","").replace("table",m).replace("blockquote"," {0,3}>").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|textarea|!--)").replace("tag",d).getRegex()},$={...w,html:r("^ *(?:comment *(?:\\n|\\s*$)|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)|\\s]*)*?/?> *(?:\\n{2,}|\\s*$))").replace("comment",f).replace(/tag/g,"(?!(?:a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b").getRegex(),def:/^ *\[([^\]]+)\]: *]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,heading:/^(#{1,6})(.*)(?:\n+|$)/,fences:s,lheading:/^(.+?)\n {0,3}(=+|-+) *(?:\n+|$)/,paragraph:r(p).replace("hr",l).replace("heading"," *#{1,6} *[^\n]").replace("lheading",c).replace("|table","").replace("blockquote"," {0,3}>").replace("|fences","").replace("|list","").replace("|html","").replace("|tag","").getRegex()},R=/^( {2,}|\\)\n(?!\s*$)/,S=/[\p{P}\p{S}]/u,T=/[\s\p{P}\p{S}]/u,z=/[^\s\p{P}\p{S}]/u,A=r(/^((?![*_])punctSpace)/,"u").replace(/punctSpace/g,T).getRegex(),_=/(?!~)[\p{P}\p{S}]/u,P=/^(?:\*+(?:((?!\*)punct)|[^\s*]))|^_+(?:((?!_)punct)|([^\s_]))/,I=r(P,"u").replace(/punct/g,S).getRegex(),L=r(P,"u").replace(/punct/g,_).getRegex(),B="^[^_*]*?__[^_*]*?\\*[^_*]*?(?=__)|[^*]+(?=[^*])|(?!\\*)punct(\\*+)(?=[\\s]|$)|notPunctSpace(\\*+)(?!\\*)(?=punctSpace|$)|(?!\\*)punctSpace(\\*+)(?=notPunctSpace)|[\\s](\\*+)(?!\\*)(?=punct)|(?!\\*)punct(\\*+)(?!\\*)(?=punct)|notPunctSpace(\\*+)(?=notPunctSpace)",C=r(B,"gu").replace(/notPunctSpace/g,z).replace(/punctSpace/g,T).replace(/punct/g,S).getRegex(),q=r(B,"gu").replace(/notPunctSpace/g,/(?:[^\s\p{P}\p{S}]|~)/u).replace(/punctSpace/g,/(?!~)[\s\p{P}\p{S}]/u).replace(/punct/g,_).getRegex(),E=r("^[^_*]*?\\*\\*[^_*]*?_[^_*]*?(?=\\*\\*)|[^_]+(?=[^_])|(?!_)punct(_+)(?=[\\s]|$)|notPunctSpace(_+)(?!_)(?=punctSpace|$)|(?!_)punctSpace(_+)(?=notPunctSpace)|[\\s](_+)(?!_)(?=punct)|(?!_)punct(_+)(?!_)(?=punct)","gu").replace(/notPunctSpace/g,z).replace(/punctSpace/g,T).replace(/punct/g,S).getRegex(),Z=r(/\\(punct)/,"gu").replace(/punct/g,S).getRegex(),v=r(/^<(scheme:[^\s\x00-\x1f<>]*|email)>/).replace("scheme",/[a-zA-Z][a-zA-Z0-9+.-]{1,31}/).replace("email",/[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/).getRegex(),D=r(f).replace("(?:--\x3e|$)","--\x3e").getRegex(),M=r("^comment|^|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>|^<\\?[\\s\\S]*?\\?>|^|^").replace("comment",D).replace("attribute",/\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/).getRegex(),O=/(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/,Q=r(/^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/).replace("label",O).replace("href",/<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/).replace("title",/"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/).getRegex(),j=r(/^!?\[(label)\]\[(ref)\]/).replace("label",O).replace("ref",u).getRegex(),N=r(/^!?\[(ref)\](?:\[\])?/).replace("ref",u).getRegex(),G={_backpedal:s,anyPunctuation:Z,autolink:v,blockSkip:/\[[^[\]]*?\]\((?:\\.|[^\\\(\)]|\((?:\\.|[^\\\(\)])*\))*\)|`[^`]*?`|<[^<>]*?>/g,br:R,code:/^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,del:s,emStrongLDelim:I,emStrongRDelimAst:C,emStrongRDelimUnd:E,escape:/^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,link:Q,nolink:N,punctuation:A,reflink:j,reflinkSearch:r("reflink|nolink(?!\\()","g").replace("reflink",j).replace("nolink",N).getRegex(),tag:M,text:/^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\":">",'"':""","'":"'"},V=e=>K[e];function W(e,t){if(t){if(i.escapeTest.test(e))return e.replace(i.escapeReplace,V)}else if(i.escapeTestNoEncode.test(e))return e.replace(i.escapeReplaceNoEncode,V);return e}function Y(e){try{e=encodeURI(e).replace(i.percentDecode,"%")}catch{return null}return e}function ee(e,t){const n=e.replace(i.findPipe,((e,t,n)=>{let s=!1,r=t;for(;--r>=0&&"\\"===n[r];)s=!s;return s?"|":" |"})).split(i.splitPipe);let s=0;if(n[0].trim()||n.shift(),n.length>0&&!n.at(-1)?.trim()&&n.pop(),t)if(n.length>t)n.splice(t);else for(;n.length0)return{type:"space",raw:t[0]}}code(e){const t=this.rules.block.code.exec(e);if(t){const e=t[0].replace(this.rules.other.codeRemoveIndent,"");return{type:"code",raw:t[0],codeBlockStyle:"indented",text:this.options.pedantic?e:te(e,"\n")}}}fences(e){const t=this.rules.block.fences.exec(e);if(t){const e=t[0],n=function(e,t,n){const s=e.match(n.other.indentCodeCompensation);if(null===s)return t;const r=s[1];return t.split("\n").map((e=>{const t=e.match(n.other.beginningSpace);if(null===t)return e;const[s]=t;return s.length>=r.length?e.slice(r.length):e})).join("\n")}(e,t[3]||"",this.rules);return{type:"code",raw:e,lang:t[2]?t[2].trim().replace(this.rules.inline.anyPunctuation,"$1"):t[2],text:n}}}heading(e){const t=this.rules.block.heading.exec(e);if(t){let e=t[2].trim();if(this.rules.other.endingHash.test(e)){const t=te(e,"#");this.options.pedantic?e=t.trim():t&&!this.rules.other.endingSpaceChar.test(t)||(e=t.trim())}return{type:"heading",raw:t[0],depth:t[1].length,text:e,tokens:this.lexer.inline(e)}}}hr(e){const t=this.rules.block.hr.exec(e);if(t)return{type:"hr",raw:te(t[0],"\n")}}blockquote(e){const t=this.rules.block.blockquote.exec(e);if(t){let e=te(t[0],"\n").split("\n"),n="",s="";const r=[];for(;e.length>0;){let t=!1;const i=[];let l;for(l=0;l1,r={type:"list",raw:"",ordered:s,start:s?+n.slice(0,-1):"",loose:!1,items:[]};n=s?`\\d{1,9}\\${n.slice(-1)}`:`\\${n}`,this.options.pedantic&&(n=s?n:"[*+-]");const i=this.rules.other.listItemRegex(n);let l=!1;for(;e;){let n=!1,s="",o="";if(!(t=i.exec(e)))break;if(this.rules.block.hr.test(e))break;s=t[0],e=e.substring(s.length);let a=t[2].split("\n",1)[0].replace(this.rules.other.listReplaceTabs,(e=>" ".repeat(3*e.length))),c=e.split("\n",1)[0],h=!a.trim(),p=0;if(this.options.pedantic?(p=2,o=a.trimStart()):h?p=t[1].length+1:(p=t[2].search(this.rules.other.nonSpaceChar),p=p>4?1:p,o=a.slice(p),p+=t[1].length),h&&this.rules.other.blankLine.test(c)&&(s+=c+"\n",e=e.substring(c.length+1),n=!0),!n){const t=this.rules.other.nextBulletRegex(p),n=this.rules.other.hrRegex(p),r=this.rules.other.fencesBeginRegex(p),i=this.rules.other.headingBeginRegex(p),l=this.rules.other.htmlBeginRegex(p);for(;e;){const u=e.split("\n",1)[0];let g;if(c=u,this.options.pedantic?(c=c.replace(this.rules.other.listReplaceNesting," "),g=c):g=c.replace(this.rules.other.tabCharGlobal," "),r.test(c))break;if(i.test(c))break;if(l.test(c))break;if(t.test(c))break;if(n.test(c))break;if(g.search(this.rules.other.nonSpaceChar)>=p||!c.trim())o+="\n"+g.slice(p);else{if(h)break;if(a.replace(this.rules.other.tabCharGlobal," ").search(this.rules.other.nonSpaceChar)>=4)break;if(r.test(a))break;if(i.test(a))break;if(n.test(a))break;o+="\n"+c}h||c.trim()||(h=!0),s+=u+"\n",e=e.substring(u.length+1),a=g.slice(p)}}r.loose||(l?r.loose=!0:this.rules.other.doubleBlankLine.test(s)&&(l=!0));let u,g=null;this.options.gfm&&(g=this.rules.other.listIsTask.exec(o),g&&(u="[ ] "!==g[0],o=o.replace(this.rules.other.listReplaceTask,""))),r.items.push({type:"list_item",raw:s,task:!!g,checked:u,loose:!1,text:o,tokens:[]}),r.raw+=s}const o=r.items.at(-1);if(!o)return;o.raw=o.raw.trimEnd(),o.text=o.text.trimEnd(),r.raw=r.raw.trimEnd();for(let e=0;e"space"===e.type)),n=t.length>0&&t.some((e=>this.rules.other.anyLine.test(e.raw)));r.loose=n}if(r.loose)for(let e=0;e({text:e,tokens:this.lexer.inline(e),header:!1,align:i.align[t]}))));return i}}lheading(e){const t=this.rules.block.lheading.exec(e);if(t)return{type:"heading",raw:t[0],depth:"="===t[2].charAt(0)?1:2,text:t[1],tokens:this.lexer.inline(t[1])}}paragraph(e){const t=this.rules.block.paragraph.exec(e);if(t){const e="\n"===t[1].charAt(t[1].length-1)?t[1].slice(0,-1):t[1];return{type:"paragraph",raw:t[0],text:e,tokens:this.lexer.inline(e)}}}text(e){const t=this.rules.block.text.exec(e);if(t)return{type:"text",raw:t[0],text:t[0],tokens:this.lexer.inline(t[0])}}escape(e){const t=this.rules.inline.escape.exec(e);if(t)return{type:"escape",raw:t[0],text:t[1]}}tag(e){const t=this.rules.inline.tag.exec(e);if(t)return!this.lexer.state.inLink&&this.rules.other.startATag.test(t[0])?this.lexer.state.inLink=!0:this.lexer.state.inLink&&this.rules.other.endATag.test(t[0])&&(this.lexer.state.inLink=!1),!this.lexer.state.inRawBlock&&this.rules.other.startPreScriptTag.test(t[0])?this.lexer.state.inRawBlock=!0:this.lexer.state.inRawBlock&&this.rules.other.endPreScriptTag.test(t[0])&&(this.lexer.state.inRawBlock=!1),{type:"html",raw:t[0],inLink:this.lexer.state.inLink,inRawBlock:this.lexer.state.inRawBlock,block:!1,text:t[0]}}link(e){const t=this.rules.inline.link.exec(e);if(t){const e=t[2].trim();if(!this.options.pedantic&&this.rules.other.startAngleBracket.test(e)){if(!this.rules.other.endAngleBracket.test(e))return;const t=te(e.slice(0,-1),"\\");if((e.length-t.length)%2==0)return}else{const e=function(e,t){if(-1===e.indexOf(t[1]))return-1;let n=0;for(let s=0;s-1){const n=(0===t[0].indexOf("!")?5:4)+t[1].length+e;t[2]=t[2].substring(0,e),t[0]=t[0].substring(0,n).trim(),t[3]=""}}let n=t[2],s="";if(this.options.pedantic){const e=this.rules.other.pedanticHrefTitle.exec(n);e&&(n=e[1],s=e[3])}else s=t[3]?t[3].slice(1,-1):"";return n=n.trim(),this.rules.other.startAngleBracket.test(n)&&(n=this.options.pedantic&&!this.rules.other.endAngleBracket.test(e)?n.slice(1):n.slice(1,-1)),ne(t,{href:n?n.replace(this.rules.inline.anyPunctuation,"$1"):n,title:s?s.replace(this.rules.inline.anyPunctuation,"$1"):s},t[0],this.lexer,this.rules)}}reflink(e,t){let n;if((n=this.rules.inline.reflink.exec(e))||(n=this.rules.inline.nolink.exec(e))){const e=t[(n[2]||n[1]).replace(this.rules.other.multipleSpaceGlobal," ").toLowerCase()];if(!e){const e=n[0].charAt(0);return{type:"text",raw:e,text:e}}return ne(n,e,n[0],this.lexer,this.rules)}}emStrong(e,t,n=""){let s=this.rules.inline.emStrongLDelim.exec(e);if(!s)return;if(s[3]&&n.match(this.rules.other.unicodeAlphaNumeric))return;if(!(s[1]||s[2]||"")||!n||this.rules.inline.punctuation.exec(n)){const n=[...s[0]].length-1;let r,i,l=n,o=0;const a="*"===s[0][0]?this.rules.inline.emStrongRDelimAst:this.rules.inline.emStrongRDelimUnd;for(a.lastIndex=0,t=t.slice(-1*e.length+n);null!=(s=a.exec(t));){if(r=s[1]||s[2]||s[3]||s[4]||s[5]||s[6],!r)continue;if(i=[...r].length,s[3]||s[4]){l+=i;continue}if((s[5]||s[6])&&n%3&&!((n+i)%3)){o+=i;continue}if(l-=i,l>0)continue;i=Math.min(i,i+l+o);const t=[...s[0]][0].length,a=e.slice(0,n+s.index+t+i);if(Math.min(n,i)%2){const e=a.slice(1,-1);return{type:"em",raw:a,text:e,tokens:this.lexer.inlineTokens(e)}}const c=a.slice(2,-2);return{type:"strong",raw:a,text:c,tokens:this.lexer.inlineTokens(c)}}}}codespan(e){const t=this.rules.inline.code.exec(e);if(t){let e=t[2].replace(this.rules.other.newLineCharGlobal," ");const n=this.rules.other.nonSpaceChar.test(e),s=this.rules.other.startingSpaceChar.test(e)&&this.rules.other.endingSpaceChar.test(e);return n&&s&&(e=e.substring(1,e.length-1)),{type:"codespan",raw:t[0],text:e}}}br(e){const t=this.rules.inline.br.exec(e);if(t)return{type:"br",raw:t[0]}}del(e){const t=this.rules.inline.del.exec(e);if(t)return{type:"del",raw:t[0],text:t[2],tokens:this.lexer.inlineTokens(t[2])}}autolink(e){const t=this.rules.inline.autolink.exec(e);if(t){let e,n;return"@"===t[2]?(e=t[1],n="mailto:"+e):(e=t[1],n=e),{type:"link",raw:t[0],text:e,href:n,tokens:[{type:"text",raw:e,text:e}]}}}url(e){let t;if(t=this.rules.inline.url.exec(e)){let e,n;if("@"===t[2])e=t[0],n="mailto:"+e;else{let s;do{s=t[0],t[0]=this.rules.inline._backpedal.exec(t[0])?.[0]??""}while(s!==t[0]);e=t[0],n="www."===t[1]?"http://"+t[0]:t[0]}return{type:"link",raw:t[0],text:e,href:n,tokens:[{type:"text",raw:e,text:e}]}}}inlineText(e){const t=this.rules.inline.text.exec(e);if(t){const e=this.lexer.state.inRawBlock;return{type:"text",raw:t[0],text:t[0],escaped:e}}}}class re{tokens;options;state;tokenizer;inlineQueue;constructor(t){this.tokens=[],this.tokens.links=Object.create(null),this.options=t||e.defaults,this.options.tokenizer=this.options.tokenizer||new se,this.tokenizer=this.options.tokenizer,this.tokenizer.options=this.options,this.tokenizer.lexer=this,this.inlineQueue=[],this.state={inLink:!1,inRawBlock:!1,top:!0};const n={other:i,block:U.normal,inline:J.normal};this.options.pedantic?(n.block=U.pedantic,n.inline=J.pedantic):this.options.gfm&&(n.block=U.gfm,this.options.breaks?n.inline=J.breaks:n.inline=J.gfm),this.tokenizer.rules=n}static get rules(){return{block:U,inline:J}}static lex(e,t){return new re(t).lex(e)}static lexInline(e,t){return new re(t).inlineTokens(e)}lex(e){e=e.replace(i.carriageReturn,"\n"),this.blockTokens(e,this.tokens);for(let e=0;e!!(s=n.call({lexer:this},e,t))&&(e=e.substring(s.raw.length),t.push(s),!0))))continue;if(s=this.tokenizer.space(e)){e=e.substring(s.raw.length);const n=t.at(-1);1===s.raw.length&&void 0!==n?n.raw+="\n":t.push(s);continue}if(s=this.tokenizer.code(e)){e=e.substring(s.raw.length);const n=t.at(-1);"paragraph"===n?.type||"text"===n?.type?(n.raw+="\n"+s.raw,n.text+="\n"+s.text,this.inlineQueue.at(-1).src=n.text):t.push(s);continue}if(s=this.tokenizer.fences(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.heading(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.hr(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.blockquote(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.list(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.html(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.def(e)){e=e.substring(s.raw.length);const n=t.at(-1);"paragraph"===n?.type||"text"===n?.type?(n.raw+="\n"+s.raw,n.text+="\n"+s.raw,this.inlineQueue.at(-1).src=n.text):this.tokens.links[s.tag]||(this.tokens.links[s.tag]={href:s.href,title:s.title});continue}if(s=this.tokenizer.table(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.lheading(e)){e=e.substring(s.raw.length),t.push(s);continue}let r=e;if(this.options.extensions?.startBlock){let t=1/0;const n=e.slice(1);let s;this.options.extensions.startBlock.forEach((e=>{s=e.call({lexer:this},n),"number"==typeof s&&s>=0&&(t=Math.min(t,s))})),t<1/0&&t>=0&&(r=e.substring(0,t+1))}if(this.state.top&&(s=this.tokenizer.paragraph(r))){const i=t.at(-1);n&&"paragraph"===i?.type?(i.raw+="\n"+s.raw,i.text+="\n"+s.text,this.inlineQueue.pop(),this.inlineQueue.at(-1).src=i.text):t.push(s),n=r.length!==e.length,e=e.substring(s.raw.length)}else if(s=this.tokenizer.text(e)){e=e.substring(s.raw.length);const n=t.at(-1);"text"===n?.type?(n.raw+="\n"+s.raw,n.text+="\n"+s.text,this.inlineQueue.pop(),this.inlineQueue.at(-1).src=n.text):t.push(s)}else if(e){const t="Infinite loop on byte: "+e.charCodeAt(0);if(this.options.silent){console.error(t);break}throw new Error(t)}}return this.state.top=!0,t}inline(e,t=[]){return this.inlineQueue.push({src:e,tokens:t}),t}inlineTokens(e,t=[]){let n=e,s=null;if(this.tokens.links){const e=Object.keys(this.tokens.links);if(e.length>0)for(;null!=(s=this.tokenizer.rules.inline.reflinkSearch.exec(n));)e.includes(s[0].slice(s[0].lastIndexOf("[")+1,-1))&&(n=n.slice(0,s.index)+"["+"a".repeat(s[0].length-2)+"]"+n.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex))}for(;null!=(s=this.tokenizer.rules.inline.blockSkip.exec(n));)n=n.slice(0,s.index)+"["+"a".repeat(s[0].length-2)+"]"+n.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);for(;null!=(s=this.tokenizer.rules.inline.anyPunctuation.exec(n));)n=n.slice(0,s.index)+"++"+n.slice(this.tokenizer.rules.inline.anyPunctuation.lastIndex);let r=!1,i="";for(;e;){let s;if(r||(i=""),r=!1,this.options.extensions?.inline?.some((n=>!!(s=n.call({lexer:this},e,t))&&(e=e.substring(s.raw.length),t.push(s),!0))))continue;if(s=this.tokenizer.escape(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.tag(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.link(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.reflink(e,this.tokens.links)){e=e.substring(s.raw.length);const n=t.at(-1);"text"===s.type&&"text"===n?.type?(n.raw+=s.raw,n.text+=s.text):t.push(s);continue}if(s=this.tokenizer.emStrong(e,n,i)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.codespan(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.br(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.del(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.autolink(e)){e=e.substring(s.raw.length),t.push(s);continue}if(!this.state.inLink&&(s=this.tokenizer.url(e))){e=e.substring(s.raw.length),t.push(s);continue}let l=e;if(this.options.extensions?.startInline){let t=1/0;const n=e.slice(1);let s;this.options.extensions.startInline.forEach((e=>{s=e.call({lexer:this},n),"number"==typeof s&&s>=0&&(t=Math.min(t,s))})),t<1/0&&t>=0&&(l=e.substring(0,t+1))}if(s=this.tokenizer.inlineText(l)){e=e.substring(s.raw.length),"_"!==s.raw.slice(-1)&&(i=s.raw.slice(-1)),r=!0;const n=t.at(-1);"text"===n?.type?(n.raw+=s.raw,n.text+=s.text):t.push(s)}else if(e){const t="Infinite loop on byte: "+e.charCodeAt(0);if(this.options.silent){console.error(t);break}throw new Error(t)}}return t}}class ie{options;parser;constructor(t){this.options=t||e.defaults}space(e){return""}code({text:e,lang:t,escaped:n}){const s=(t||"").match(i.notSpaceStart)?.[0],r=e.replace(i.endingNewline,"")+"\n";return s?'
'+(n?r:W(r,!0))+"
\n":"
"+(n?r:W(r,!0))+"
\n"}blockquote({tokens:e}){return`
\n${this.parser.parse(e)}
\n`}html({text:e}){return e}heading({tokens:e,depth:t}){return`${this.parser.parseInline(e)}\n`}hr(e){return"
\n"}list(e){const t=e.ordered,n=e.start;let s="";for(let t=0;t\n"+s+"\n"}listitem(e){let t="";if(e.task){const n=this.checkbox({checked:!!e.checked});e.loose?"paragraph"===e.tokens[0]?.type?(e.tokens[0].text=n+" "+e.tokens[0].text,e.tokens[0].tokens&&e.tokens[0].tokens.length>0&&"text"===e.tokens[0].tokens[0].type&&(e.tokens[0].tokens[0].text=n+" "+W(e.tokens[0].tokens[0].text),e.tokens[0].tokens[0].escaped=!0)):e.tokens.unshift({type:"text",raw:n+" ",text:n+" ",escaped:!0}):t+=n+" "}return t+=this.parser.parse(e.tokens,!!e.loose),`
  • ${t}
  • \n`}checkbox({checked:e}){return"'}paragraph({tokens:e}){return`

    ${this.parser.parseInline(e)}

    \n`}table(e){let t="",n="";for(let t=0;t${s}`),"
    \n\n"+t+"\n"+s+"
    \n"}tablerow({text:e}){return`\n${e}\n`}tablecell(e){const t=this.parser.parseInline(e.tokens),n=e.header?"th":"td";return(e.align?`<${n} align="${e.align}">`:`<${n}>`)+t+`\n`}strong({tokens:e}){return`${this.parser.parseInline(e)}`}em({tokens:e}){return`${this.parser.parseInline(e)}`}codespan({text:e}){return`${W(e,!0)}`}br(e){return"
    "}del({tokens:e}){return`${this.parser.parseInline(e)}`}link({href:e,title:t,tokens:n}){const s=this.parser.parseInline(n),r=Y(e);if(null===r)return s;let i='
    ",i}image({href:e,title:t,text:n}){const s=Y(e);if(null===s)return W(n);let r=`${n}{const r=e[s].flat(1/0);n=n.concat(this.walkTokens(r,t))})):e.tokens&&(n=n.concat(this.walkTokens(e.tokens,t)))}}return n}use(...e){const t=this.defaults.extensions||{renderers:{},childTokens:{}};return e.forEach((e=>{const n={...e};if(n.async=this.defaults.async||n.async||!1,e.extensions&&(e.extensions.forEach((e=>{if(!e.name)throw new Error("extension name required");if("renderer"in e){const n=t.renderers[e.name];t.renderers[e.name]=n?function(...t){let s=e.renderer.apply(this,t);return!1===s&&(s=n.apply(this,t)),s}:e.renderer}if("tokenizer"in e){if(!e.level||"block"!==e.level&&"inline"!==e.level)throw new Error("extension level must be 'block' or 'inline'");const n=t[e.level];n?n.unshift(e.tokenizer):t[e.level]=[e.tokenizer],e.start&&("block"===e.level?t.startBlock?t.startBlock.push(e.start):t.startBlock=[e.start]:"inline"===e.level&&(t.startInline?t.startInline.push(e.start):t.startInline=[e.start]))}"childTokens"in e&&e.childTokens&&(t.childTokens[e.name]=e.childTokens)})),n.extensions=t),e.renderer){const t=this.defaults.renderer||new ie(this.defaults);for(const n in e.renderer){if(!(n in t))throw new Error(`renderer '${n}' does not exist`);if(["options","parser"].includes(n))continue;const s=n,r=e.renderer[s],i=t[s];t[s]=(...e)=>{let n=r.apply(t,e);return!1===n&&(n=i.apply(t,e)),n||""}}n.renderer=t}if(e.tokenizer){const t=this.defaults.tokenizer||new se(this.defaults);for(const n in e.tokenizer){if(!(n in t))throw new Error(`tokenizer '${n}' does not exist`);if(["options","rules","lexer"].includes(n))continue;const s=n,r=e.tokenizer[s],i=t[s];t[s]=(...e)=>{let n=r.apply(t,e);return!1===n&&(n=i.apply(t,e)),n}}n.tokenizer=t}if(e.hooks){const t=this.defaults.hooks||new ae;for(const n in e.hooks){if(!(n in t))throw new Error(`hook '${n}' does not exist`);if(["options","block"].includes(n))continue;const s=n,r=e.hooks[s],i=t[s];ae.passThroughHooks.has(n)?t[s]=e=>{if(this.defaults.async)return Promise.resolve(r.call(t,e)).then((e=>i.call(t,e)));const n=r.call(t,e);return i.call(t,n)}:t[s]=(...e)=>{let n=r.apply(t,e);return!1===n&&(n=i.apply(t,e)),n}}n.hooks=t}if(e.walkTokens){const t=this.defaults.walkTokens,s=e.walkTokens;n.walkTokens=function(e){let n=[];return n.push(s.call(this,e)),t&&(n=n.concat(t.call(this,e))),n}}this.defaults={...this.defaults,...n}})),this}setOptions(e){return this.defaults={...this.defaults,...e},this}lexer(e,t){return re.lex(e,t??this.defaults)}parser(e,t){return oe.parse(e,t??this.defaults)}parseMarkdown(e){return(t,n)=>{const s={...n},r={...this.defaults,...s},i=this.onError(!!r.silent,!!r.async);if(!0===this.defaults.async&&!1===s.async)return i(new Error("marked(): The async option was set to true by an extension. Remove async: false from the parse options object to return a Promise."));if(null==t)return i(new Error("marked(): input parameter is undefined or null"));if("string"!=typeof t)return i(new Error("marked(): input parameter is of type "+Object.prototype.toString.call(t)+", string expected"));r.hooks&&(r.hooks.options=r,r.hooks.block=e);const l=r.hooks?r.hooks.provideLexer():e?re.lex:re.lexInline,o=r.hooks?r.hooks.provideParser():e?oe.parse:oe.parseInline;if(r.async)return Promise.resolve(r.hooks?r.hooks.preprocess(t):t).then((e=>l(e,r))).then((e=>r.hooks?r.hooks.processAllTokens(e):e)).then((e=>r.walkTokens?Promise.all(this.walkTokens(e,r.walkTokens)).then((()=>e)):e)).then((e=>o(e,r))).then((e=>r.hooks?r.hooks.postprocess(e):e)).catch(i);try{r.hooks&&(t=r.hooks.preprocess(t));let e=l(t,r);r.hooks&&(e=r.hooks.processAllTokens(e)),r.walkTokens&&this.walkTokens(e,r.walkTokens);let n=o(e,r);return r.hooks&&(n=r.hooks.postprocess(n)),n}catch(e){return i(e)}}}onError(e,t){return n=>{if(n.message+="\nPlease report this to https://github.com/markedjs/marked.",e){const e="

    An error occurred:

    "+W(n.message+"",!0)+"
    ";return t?Promise.resolve(e):e}if(t)return Promise.reject(n);throw n}}}const he=new ce;function pe(e,t){return he.parse(e,t)}pe.options=pe.setOptions=function(e){return he.setOptions(e),pe.defaults=he.defaults,n(pe.defaults),pe},pe.getDefaults=t,pe.defaults=e.defaults,pe.use=function(...e){return he.use(...e),pe.defaults=he.defaults,n(pe.defaults),pe},pe.walkTokens=function(e,t){return he.walkTokens(e,t)},pe.parseInline=he.parseInline,pe.Parser=oe,pe.parser=oe.parse,pe.Renderer=ie,pe.TextRenderer=le,pe.Lexer=re,pe.lexer=re.lex,pe.Tokenizer=se,pe.Hooks=ae,pe.parse=pe;const ue=pe.options,ge=pe.setOptions,ke=pe.use,de=pe.walkTokens,fe=pe.parseInline,xe=pe,be=oe.parse,we=re.lex;e.Hooks=ae,e.Lexer=re,e.Marked=ce,e.Parser=oe,e.Renderer=ie,e.TextRenderer=le,e.Tokenizer=se,e.getDefaults=t,e.lexer=we,e.marked=pe,e.options=ue,e.parse=xe,e.parseInline=fe,e.parser=be,e.setOptions=ge,e.use=ke,e.walkTokens=de})); \ No newline at end of file