init

2025-03-05 14:14:54 +08:00 · 2025-03-05 14:14:54 +08:00 · 98d7406a99
commit 98d7406a99
36 changed files with 2676 additions and 0 deletions
--- a/knows-java/pom.xml
+++ b/knows-java/pom.xml
@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.springframework.boot</groupId>
+        <artifactId>spring-boot-starter-parent</artifactId>
+        <version>3.3.2</version>
+        <relativePath/> <!-- lookup parent from repository -->
+    </parent>
+    <groupId>com.zhych</groupId>
+    <artifactId>knows</artifactId>
+    <version>0.0.1-SNAPSHOT</version>
+    <name>embeddings</name>
+    <description>embeddings</description>
+
+    <properties>
+        <java.version>17</java.version>
+    </properties>
+    <dependencies>
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-web</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+            <version>3.12.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.projectlombok</groupId>
+            <artifactId>lombok</artifactId>
+            <optional>true</optional>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-test</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.alibaba</groupId>
+            <artifactId>fastjson</artifactId>
+            <version>2.0.15</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>cn.hutool</groupId>
+            <artifactId>hutool-all</artifactId>
+            <version>5.8.25</version>
+        </dependency>
+        <dependency>
+            <groupId>com.squareup.okhttp3</groupId>
+            <artifactId>okhttp</artifactId>
+            <version>5.0.0-alpha.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+            <version>4.5.13</version>
+        </dependency>
+        <dependency>
+            <groupId>org.elasticsearch.client</groupId>
+            <artifactId>elasticsearch-rest-high-level-client</artifactId>
+            <version>7.17.23</version>
+        </dependency>
+        <dependency>
+            <groupId>co.elastic.clients</groupId>
+            <artifactId>elasticsearch-java</artifactId>
+            <version>8.13.4</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.15.2</version>
+        </dependency>
+        <dependency>
+            <groupId>com.alibaba</groupId>
+            <artifactId>dashscope-sdk-java</artifactId>
+            <version>2.8.3</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox</artifactId>
+            <version>2.0.24</version>
+        </dependency>
+
+        <dependency>
+            <groupId>net.sourceforge.tess4j</groupId>
+            <artifactId>tess4j</artifactId>
+            <version>5.7.0</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>opencv-platform</artifactId>
+            <version>4.7.0-1.5.9</version>
+        </dependency>
+
+        <!-- Apache POI for Word documents -->
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi</artifactId>
+            <version>5.2.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>5.2.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-scratchpad</artifactId>
+            <version>5.2.3</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>org.springframework.boot</groupId>
+                <artifactId>spring-boot-maven-plugin</artifactId>
+                <configuration>
+                    <excludes>
+                        <exclude>
+                            <groupId>org.projectlombok</groupId>
+                            <artifactId>lombok</artifactId>
+                        </exclude>
+                    </excludes>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <version>3.0.0-M5</version>
+                <configuration>
+                    <argLine>
+                        -Xmx2048m
+                        -Djava.library.path=${project.basedir}/lib/opencv
+                    </argLine>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
--- a/knows-java/src/main/java/cn/luckday/Application.java
+++ b/knows-java/src/main/java/cn/luckday/Application.java
@ -0,0 +1,15 @@
+package cn.luckday;
+
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+import org.springframework.context.annotation.ComponentScan;
+
+@SpringBootApplication
+@ComponentScan(value = {"cn.luckday.*"})
+public class Application {
+
+    public static void main(String[] args) {
+        SpringApplication.run(Application.class, args);
+    }
+
+}
--- a/knows-java/src/main/java/cn/luckday/bean/KnowsIndex.java
+++ b/knows-java/src/main/java/cn/luckday/bean/KnowsIndex.java
@ -0,0 +1,21 @@
+package cn.luckday.bean;
+
+import lombok.Data;
+
+@Data
+public class KnowsIndex {
+
+    private String id;
+
+    private String file_name;
+
+    private String file_path;
+
+    private String file_type;
+
+    private String file_size;
+
+    private String content;
+
+    private double[] content_vec;
+}
--- a/knows-java/src/main/java/cn/luckday/bean/SearchResult.java
+++ b/knows-java/src/main/java/cn/luckday/bean/SearchResult.java
@ -0,0 +1,13 @@
+package cn.luckday.bean;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+public class SearchResult {
+    private KnowsIndex knowsIndex;
+    private Double score;
+}
--- a/knows-java/src/main/java/cn/luckday/controller/KnowsController.java
+++ b/knows-java/src/main/java/cn/luckday/controller/KnowsController.java
@ -0,0 +1,112 @@
+package cn.luckday.controller;
+
+import cn.hutool.core.collection.CollUtil;
+import cn.luckday.llm.QwenClient;
+import com.alibaba.dashscope.aigc.generation.GenerationResult;
+import com.alibaba.dashscope.exception.InputRequiredException;
+import com.alibaba.dashscope.exception.NoApiKeyException;
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import cn.luckday.bean.SearchResult;
+import cn.luckday.embed.EmbedClient;
+import cn.luckday.embed.ReRankClient;
+import cn.luckday.llm.OllamaClient;
+import cn.luckday.service.EsDocumentService;
+import jakarta.annotation.Resource;
+import jakarta.servlet.http.HttpServletResponse;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+@Slf4j
+@RestController
+@RequestMapping("/knows")
+public class KnowsController {
+
+    @Value("${embedding.uri}")
+    private String embeddingUri;
+
+    @Value("${embedding.api-key}")
+    private String embeddingApiKey;
+
+    @Value("${re-rank.uri}")
+    private String ReRankUri;
+
+    @Value("${re-rank.api-key}")
+    private String ReRankApiKey;
+
+    @Value("${oll.uri}")
+    private String ollUri;
+
+    @Value("${qwen.api-key}")
+    private static String apiKey;
+
+    @Value("${qwen.model}")
+    private static String model;
+
+    @Resource
+    private EsDocumentService service;
+
+    @PostMapping("/process")
+    public List<SearchResult> process(@RequestBody Map<String, String> dto) throws IOException {
+        String keyword = dto.get("keyword");
+        return service.searchVector(EmbedClient.getEmbedding(embeddingUri, embeddingApiKey, keyword));
+    }
+
+    @PostMapping("/generate")
+    public void generate(HttpServletResponse response, @RequestBody Map<String, String> dto) throws IOException, NoApiKeyException, InputRequiredException {
+        String keyword = dto.get("keyword");
+        List<SearchResult> searchResults = service.searchVector(EmbedClient.getEmbedding(embeddingUri, embeddingApiKey, keyword));
+        List<String> contents = searchResults.stream().map(searchResult -> searchResult.getKnowsIndex().getContent()).toList();
+        log.info("搜索结果searchResults: {} ", contents);
+
+        Object reRankPassages = "";
+        if (CollUtil.isNotEmpty(searchResults)) {
+            // 重排处理
+            List<String> contentList = new ArrayList<>();
+            searchResults.forEach(searchResult -> contentList.add(searchResult.getKnowsIndex().getContent()));
+            String reRank = ReRankClient.reRank(ReRankUri, ReRankApiKey, contentList, keyword);
+            log.info("重排结果reRank: {} ", reRank);
+
+            JSONObject jsonObject = JSON.parseObject(reRank, JSONObject.class);
+            reRankPassages = jsonObject.get("rerank_passages");
+        }
+
+        // LLM总结回答
+        OllamaClient.sendMsg(response, ollUri, keyword, reRankPassages.toString());
+    }
+
+    @PostMapping("/qwen-generate")
+    public String qwen(@RequestBody Map<String, String> dto) throws IOException, NoApiKeyException, InputRequiredException {
+        String keyword = dto.get("keyword");
+        List<SearchResult> searchResults = service.searchVector(EmbedClient.getEmbedding(embeddingUri, embeddingApiKey, keyword));
+        List<String> contents = searchResults.stream().map(searchResult -> searchResult.getKnowsIndex().getContent()).toList();
+        log.info("搜索结果searchResults: {} ", contents);
+
+        Object reRankPassages = "";
+        if (CollUtil.isNotEmpty(searchResults)) {
+            // 重排处理
+            List<String> contentList = new ArrayList<>();
+            searchResults.forEach(searchResult -> contentList.add(searchResult.getKnowsIndex().getContent()));
+            String reRank = ReRankClient.reRank(ReRankUri, ReRankApiKey, contentList, keyword);
+            log.info("重排结果reRank: {} ", reRank);
+
+            JSONObject jsonObject = JSON.parseObject(reRank, JSONObject.class);
+            reRankPassages = jsonObject.get("rerank_passages");
+        }
+
+        // LLM总结回答
+        GenerationResult result = QwenClient.sendMsg(model, apiKey, keyword, reRankPassages.toString());
+        String content = result.getOutput().getChoices().get(0).getMessage().getContent();
+        log.info("千问: {}", content);
+        return content;
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/controller/RedFileController.java
+++ b/knows-java/src/main/java/cn/luckday/controller/RedFileController.java
@ -0,0 +1,22 @@
+package cn.luckday.controller;
+
+import cn.luckday.service.RedFileService;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.*;
+import org.springframework.web.multipart.MultipartFile;
+import java.util.*;
+
+@RestController
+@RequestMapping("/api/file")
+public class RedFileController {
+
+  @Autowired
+  private RedFileService redFileService;
+
+  @PostMapping("/upload")
+  public ResponseEntity<?> uploadFile(@RequestParam("file") MultipartFile file) {
+    redFileService.uploadFile(file);
+    return ResponseEntity.ok(Map.of("message", "文件上传并解析成功"));
+  }
+}
--- a/knows-java/src/main/java/cn/luckday/document/Main.java
+++ b/knows-java/src/main/java/cn/luckday/document/Main.java
@ -0,0 +1,60 @@
+package cn.luckday.document;
+
+import java.io.File;
+import java.util.List;
+import java.awt.image.BufferedImage;
+
+public class Main {
+    public static void main(String[] args) {
+        try {
+            // 验证文件是否存在
+            String pdfPath = "D:\\小红书文档\\中频\\运营经验库\\方法论\\PDF\\评论区和私信的互动指引的方法论.pdf";
+            File pdfFile = new File(pdfPath);
+            if (!pdfFile.exists()) {
+                System.err.println("PDF文件不存在: " + pdfPath);
+                return;
+            }
+
+            // 初始化PDFParser时添加错误处理
+            PDFParser parser = new PDFParser(pdfPath);
+            try {
+                parser.parse();
+            } catch (Exception e) {
+                System.err.println("PDF解析失败: " + e.getMessage());
+                e.printStackTrace();
+            }
+
+            // 获取结果
+            List<String> texts = parser.getExtractedText();
+            List<BufferedImage> images = parser.getExtractedImages();
+            List<Table> tables = parser.getExtractedTables();
+
+            // // 处理Word文档
+            // String wordPath = "D:\\小红书文档\\高频\\平台知识库\\已处理word\\新模式开票流程及注意事项.docx";
+            // WordProcessor wordProcessor = new WordProcessor(wordPath);
+            // wordProcessor.process();
+            //
+            // // 获取提取的文本
+            // List<String> textContent = wordProcessor.getExtractedText();
+            // for (String text : textContent) {
+            // System.out.println(text);
+            // }
+            //
+            // // 处理表格
+            // List<XWPFTable> tables = wordProcessor.getExtractedTables();
+            // for (XWPFTable table : tables) {
+            // List<List<String>> tableData = wordProcessor.convertTableToList(table);
+            // System.out.println("表格数据：" + tableData);
+            //
+            // // 导出表格为CSV
+            // wordProcessor.exportTableToCSV(table, "table_output.csv");
+            // }
+            //
+            // // 保存图片
+            // wordProcessor.saveImages("output_images");
+        } catch (Exception e) {
+            System.err.println("程序执行出错: " + e.getMessage());
+            e.printStackTrace();
+        }
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/document/OCRProcessor.java
+++ b/knows-java/src/main/java/cn/luckday/document/OCRProcessor.java
@ -0,0 +1,180 @@
+package cn.luckday.document;
+
+import net.sourceforge.tess4j.Tesseract;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.core.Size;
+import org.opencv.imgproc.Imgproc;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.awt.image.DataBufferByte;
+import org.apache.pdfbox.pdmodel.PDDocument;
+
+public class OCRProcessor {
+    static {
+        try {
+            // 从资源目录加载本地库
+            String libraryPath = OCRProcessor.class
+                    .getClassLoader()
+                    .getResource("native/" + System.mapLibraryName("opencv_java4110"))
+                    .getPath();
+
+            System.load(libraryPath);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private final Tesseract tesseract;
+
+    public OCRProcessor() {
+        tesseract = new Tesseract();
+        initializeTesseract();
+    }
+
+    private void initializeTesseract() {
+        try {
+            // 设置Tesseract数据路径
+            String tessdataPath = System.getenv("TESSDATA_PREFIX");
+            if (tessdataPath == null || tessdataPath.isEmpty()) {
+                tessdataPath ="D:\\study\\backend\\embeddingstoes-master\\src\\main\\resources\\ocr";
+            }
+
+            tesseract.setDatapath(tessdataPath);
+
+            // 修改：使用不依赖OSD的页面分割模式
+            tesseract.setPageSegMode(3);
+
+            // 设置语言包
+            tesseract.setLanguage("chi_sim");
+
+            // 性能优化配置
+            tesseract.setTessVariable("tessedit_create_pdf", "0");
+            tesseract.setTessVariable("tessedit_create_hocr", "0");
+            tesseract.setTessVariable("tessedit_write_images", "0");
+
+        } catch (Exception e) {
+            throw new RuntimeException("Tesseract 初始化失败: " + e.getMessage(), e);
+        }
+    }
+
+    public String performOCR(BufferedImage image) {
+        try {
+            // 基本图像验证
+            if (image == null || image.getWidth() < 10 || image.getHeight() < 10) {
+                throw new IllegalArgumentException("无效的图像");
+            }
+
+            // 预处理图像
+            BufferedImage processedImage = preprocessImage(image);
+
+            // 执行OCR
+            return tesseract.doOCR(processedImage);
+
+        } catch (Exception e) {
+            System.err.println("OCR处理失败: " + e.getMessage());
+            e.printStackTrace();
+            return "";
+        }
+    }
+
+    private BufferedImage preprocessImage(BufferedImage image) {
+        try {
+            Mat mat = bufferedImageToMat(image);
+
+            // 调整预处理步骤
+            // 1. 转换为灰度图
+            Mat gray = new Mat();
+            Imgproc.cvtColor(mat, gray, Imgproc.COLOR_BGR2GRAY);
+
+            // 2. 使用OTSU二值化替代自适应阈值
+            Mat binary = new Mat();
+            Imgproc.threshold(gray, binary, 0, 255, Imgproc.THRESH_BINARY + Imgproc.THRESH_OTSU);
+
+            // 3. 添加形态学操作
+            Mat kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(3, 3));
+            Mat processed = new Mat();
+            Imgproc.morphologyEx(binary, processed, Imgproc.MORPH_CLOSE, kernel);
+
+            // 4. 边缘增强
+            Mat enhanced = new Mat();
+            Imgproc.GaussianBlur(processed, enhanced, new Size(3, 3), 0);
+
+            return matToBufferedImage(enhanced);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return image;
+        }
+    }
+
+    private Mat bufferedImageToMat(BufferedImage image) {
+        // 转换图像类型为 TYPE_3BYTE_BGR，如果需要的话
+        BufferedImage convertedImage = image;
+        if (image.getType() != BufferedImage.TYPE_3BYTE_BGR) {
+            convertedImage = new BufferedImage(
+                    image.getWidth(),
+                    image.getHeight(),
+                    BufferedImage.TYPE_3BYTE_BGR);
+            convertedImage.getGraphics().drawImage(image, 0, 0, null);
+        }
+
+        // 获取图像数据
+        byte[] pixels = ((DataBufferByte) convertedImage.getRaster().getDataBuffer()).getData();
+
+        // 创建Mat对象
+        Mat mat = new Mat(
+                convertedImage.getHeight(),
+                convertedImage.getWidth(),
+                CvType.CV_8UC3);
+        mat.put(0, 0, pixels);
+
+        return mat;
+    }
+
+    private BufferedImage matToBufferedImage(Mat mat) {
+        // 确保mat是8位3通道或单通道
+        int type = BufferedImage.TYPE_3BYTE_BGR;
+        if (mat.channels() == 1) {
+            type = BufferedImage.TYPE_BYTE_GRAY;
+        }
+
+        // 获取mat的数据
+        byte[] pixels = new byte[mat.channels() * mat.cols() * mat.rows()];
+        mat.get(0, 0, pixels);
+
+        // 创建BufferedImage
+        BufferedImage image = new BufferedImage(
+                mat.cols(),
+                mat.rows(),
+                type);
+
+        // 设置图像数据
+        byte[] targetPixels = ((DataBufferByte) image.getRaster().getDataBuffer()).getData();
+        System.arraycopy(pixels, 0, targetPixels, 0, pixels.length);
+
+        return image;
+    }
+
+    public void processPDF(String pdfPath) {
+        try {
+            // 添加内存使用监控
+            Runtime runtime = Runtime.getRuntime();
+            long maxMemory = runtime.maxMemory() / (1024 * 1024);
+            System.out.println("最大可用内存: " + maxMemory + "MB");
+
+            // 原有PDF处理代码
+            PDDocument document = PDDocument.load(new File(pdfPath));
+            // ... existing code ...
+
+            // 确保资源释放
+            document.close();
+        } catch (OutOfMemoryError e) {
+            System.err.println("内存不足: " + e.getMessage());
+            // TODO 日志记录
+        } catch (Exception e) {
+            System.err.println("处理PDF时发生错误: " + e.getMessage());
+            e.printStackTrace();
+        }
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/document/PDFParser.java
+++ b/knows-java/src/main/java/cn/luckday/document/PDFParser.java
@ -0,0 +1,137 @@
+package cn.luckday.document;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+
+import javax.imageio.ImageIO;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class PDFParser {
+    private final String pdfPath;
+    private PDDocument document;
+    private final OCRProcessor ocrProcessor;
+    private List<String> extractedText;
+    private List<BufferedImage> extractedImages;
+    private List<Table> extractedTables;
+
+    public PDFParser(String pdfPath) {
+        this.pdfPath = pdfPath;
+        this.ocrProcessor = new OCRProcessor();
+        this.extractedText = new ArrayList<>();
+        this.extractedImages = new ArrayList<>();
+        this.extractedTables = new ArrayList<>();
+    }
+
+    public void parse() {
+        try {
+            document = PDDocument.load(new File(pdfPath));
+
+            // 1. 解析文本内容
+            System.out.println("=== 开始解析文本 ===");
+            extractText();
+
+            // 2. 解析图片
+            System.out.println("\n=== 开始解析图片 ===");
+            extractImages();
+
+            // 3. 解析表格
+//            System.out.println("\n=== 开始解析表格 ===");
+//            extractTables();
+
+            document.close();
+        } catch (Exception e) {
+            System.err.println("PDF解析失败: " + e.getMessage());
+            e.printStackTrace();
+            if (document != null) {
+                try {
+                    document.close();
+                } catch (IOException ignored) {
+                }
+            }
+        }
+    }
+
+    private void extractText() throws IOException {
+        System.out.println("正在提取PDF文本...");
+
+        // 只使用PDFTextStripper提取文本
+        PDFTextStripper stripper = new PDFTextStripper();
+        String text = stripper.getText(document);
+        System.out.println("文本内容：\n" + text);
+        extractedText.add(text);
+    }
+
+    private void extractImages() throws IOException {
+        System.out.println("正在提取并处理PDF图片...");
+        int imageCounter = 0;
+
+        for (PDPage page : document.getPages()) {
+            for (COSName name : page.getResources().getXObjectNames()) {
+                PDXObject object = page.getResources().getXObject(name);
+                if (object instanceof PDImageXObject) {
+                    PDImageXObject image = (PDImageXObject) object;
+                    BufferedImage bImage = image.getImage();
+
+                    // 保存图片
+                    String imagePath = "output_images/extracted_image_" + imageCounter + ".png";
+                    ImageIO.write(bImage, "PNG", new File(imagePath));
+                    System.out.println("已保存图片: " + imagePath);
+
+                    // OCR处理图片
+                    try {
+                        System.out.println("正在对图片 " + imageCounter + " 进行OCR处理...");
+                        String imageText = ocrProcessor.performOCR(bImage);
+                        if (!imageText.trim().isEmpty()) {
+                            System.out.println("图片 " + imageCounter + " OCR结果：\n" + imageText);
+                            extractedText.add("【图片" + imageCounter + "文本】\n" + imageText);
+                        } else {
+                            System.out.println("图片 " + imageCounter + " 未识别出文本");
+                        }
+                    } catch (Exception e) {
+                        System.err.println("处理图片 " + imageCounter + " 时出错: " + e.getMessage());
+                    }
+
+                    extractedImages.add(bImage);
+                    imageCounter++;
+                }
+            }
+        }
+        System.out.println("共处理 " + imageCounter + " 张图片");
+    }
+
+    private void extractTables() {
+        System.out.println("正在提取PDF表格...");
+        TableDetector detector = new TableDetector(document);
+        extractedTables = detector.detectTables();
+
+        if (extractedTables.isEmpty()) {
+            System.out.println("未检测到表格");
+        } else {
+            System.out.println("共检测到 " + extractedTables.size() + " 个表格");
+            for (int i = 0; i < extractedTables.size(); i++) {
+                System.out.println("表格 " + (i + 1) + ":\n" + extractedTables.get(i));
+            }
+        }
+    }
+
+    // Getter方法
+    public List<String> getExtractedText() {
+        return extractedText;
+    }
+
+    public List<BufferedImage> getExtractedImages() {
+        return extractedImages;
+    }
+
+    public List<Table> getExtractedTables() {
+        return extractedTables;
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/document/Table.java
+++ b/knows-java/src/main/java/cn/luckday/document/Table.java
@ -0,0 +1,43 @@
+package cn.luckday.document;
+
+public class Table {
+    private String content;
+    private int rows;
+    private int columns;
+
+    public Table(String content) {
+        this.content = content;
+        analyzeStructure();
+    }
+
+    private void analyzeStructure() {
+        if (content == null || content.isEmpty()) {
+            return;
+        }
+
+        // 按行分割内容
+        String[] lines = content.split("\n");
+        rows = lines.length;
+
+        // 分析列数（基于空格或制表符分隔）
+        columns = 0;
+        for (String line : lines) {
+            String[] cells = line.trim().split("\\s+");
+            columns = Math.max(columns, cells.length);
+        }
+    }
+
+    public int getRows() {
+        return rows;
+    }
+
+    public int getColumns() {
+        return columns;
+    }
+
+    @Override
+    public String toString() {
+        return String.format("Table{rows=%d, columns=%d, content='%s'}",
+                rows, columns, content);
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/document/TableDetector.java
+++ b/knows-java/src/main/java/cn/luckday/document/TableDetector.java
@ -0,0 +1,170 @@
+package cn.luckday.document;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.text.PDFTextStripperByArea;
+
+import java.awt.Rectangle;
+import java.util.ArrayList;
+import java.util.List;
+
+public class TableDetector {
+    private final PDDocument document;
+
+    public TableDetector(PDDocument document) {
+        this.document = document;
+    }
+
+    public List<Table> detectTables() {
+        List<Table> tables = new ArrayList<>();
+        try {
+            for (PDPage page : document.getPages()) {
+                // 使用文本位置分析来检测表格
+                PDFTextStripperByArea stripper = new PDFTextStripperByArea();
+                stripper.setSortByPosition(true);
+
+                // 检测表格边界
+                List<Rectangle> tableRegions = detectTableRegions(page);
+
+                for (Rectangle region : tableRegions) {
+                    stripper.addRegion("table", region);
+                    stripper.extractRegions(page);
+                    String tableContent = stripper.getTextForRegion("table");
+                    tables.add(new Table(tableContent));
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return tables;
+    }
+
+    private List<Rectangle> detectTableRegions(PDPage page) {
+        List<Rectangle> regions = new ArrayList<>();
+        try {
+            // 获取页面尺寸
+            float pageHeight = page.getMediaBox().getHeight();
+            float pageWidth = page.getMediaBox().getWidth();
+
+            // 使用PDFTextStripperByArea进行文本分析
+            PDFTextStripperByArea stripper = new PDFTextStripperByArea();
+            stripper.setSortByPosition(true);
+
+            // 将页面划分为网格进行分析
+            int gridRows = 20;
+            int gridCols = 20;
+            float cellHeight = pageHeight / gridRows;
+            float cellWidth = pageWidth / gridCols;
+
+            // 存储每个网格单元的文本密度
+            int[][] textDensity = new int[gridRows][gridCols];
+
+            // 分析每个网格单元
+            for (int row = 0; row < gridRows; row++) {
+                for (int col = 0; col < gridCols; col++) {
+                    Rectangle cell = new Rectangle(
+                            (int) (col * cellWidth),
+                            (int) (row * cellHeight),
+                            (int) cellWidth,
+                            (int) cellHeight);
+
+                    stripper.addRegion("cell_" + row + "_" + col, cell);
+                    stripper.extractRegions(page);
+                    String cellText = stripper.getTextForRegion("cell_" + row + "_" + col);
+
+                    // 计算文本密度
+                    textDensity[row][col] = cellText.trim().length();
+                }
+            }
+
+            // 检测表格区域
+            List<TableRegion> potentialTables = findPotentialTables(textDensity, gridRows, gridCols);
+
+            // 转换检测到的区域为实际坐标
+            for (TableRegion tableRegion : potentialTables) {
+                Rectangle rect = new Rectangle(
+                        (int) (tableRegion.startCol * cellWidth),
+                        (int) (tableRegion.startRow * cellHeight),
+                        (int) ((tableRegion.endCol - tableRegion.startCol + 1) * cellWidth),
+                        (int) ((tableRegion.endRow - tableRegion.startRow + 1) * cellHeight));
+                regions.add(rect);
+            }
+
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return regions;
+    }
+
+    private List<TableRegion> findPotentialTables(int[][] textDensity, int rows, int cols) {
+        List<TableRegion> tables = new ArrayList<>();
+        boolean[][] visited = new boolean[rows][cols];
+
+        // 遍历网格寻找潜在的表格区域
+        for (int i = 0; i < rows; i++) {
+            for (int j = 0; j < cols; j++) {
+                if (!visited[i][j] && isTableCell(textDensity, i, j)) {
+                    TableRegion region = new TableRegion();
+                    expandTableRegion(textDensity, visited, i, j, region);
+                    if (isValidTable(region)) {
+                        tables.add(region);
+                    }
+                }
+            }
+        }
+        return tables;
+    }
+
+    private boolean isTableCell(int[][] density, int row, int col) {
+        // 判断是否为表格单元格的条件
+        // 1. 文本密度适中
+        // 2. 周围有类似的文本密度分布
+        int cellDensity = density[row][col];
+        return cellDensity > 0 && cellDensity < 100; // 可调整阈值
+    }
+
+    private void expandTableRegion(int[][] density, boolean[][] visited,
+                                   int row, int col, TableRegion region) {
+        if (row < 0 || row >= density.length ||
+                col < 0 || col >= density[0].length ||
+                visited[row][col] ||
+                !isTableCell(density, row, col)) {
+            return;
+        }
+
+        visited[row][col] = true;
+
+        // 更新表格区域的边界
+        region.updateBounds(row, col);
+
+        // 递归检查相邻单元格
+        expandTableRegion(density, visited, row - 1, col, region); // 上
+        expandTableRegion(density, visited, row + 1, col, region); // 下
+        expandTableRegion(density, visited, row, col - 1, region); // 左
+        expandTableRegion(density, visited, row, col + 1, region); // 右
+    }
+
+    private boolean isValidTable(TableRegion region) {
+        // 验证检测到的区域是否可能是表格
+        int width = region.endCol - region.startCol + 1;
+        int height = region.endRow - region.startRow + 1;
+
+        // 表格至少应该有2x2的大小
+        return width >= 2 && height >= 2;
+    }
+
+    // 表格区域数据结构
+    private static class TableRegion {
+        int startRow = Integer.MAX_VALUE;
+        int startCol = Integer.MAX_VALUE;
+        int endRow = Integer.MIN_VALUE;
+        int endCol = Integer.MIN_VALUE;
+
+        void updateBounds(int row, int col) {
+            startRow = Math.min(startRow, row);
+            startCol = Math.min(startCol, col);
+            endRow = Math.max(endRow, row);
+            endCol = Math.max(endCol, col);
+        }
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/document/WordProcessor.java
+++ b/knows-java/src/main/java/cn/luckday/document/WordProcessor.java
@ -0,0 +1,287 @@
+package cn.luckday.document;
+
+import org.apache.poi.xwpf.usermodel.*;
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.usermodel.Table;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import org.apache.poi.common.usermodel.PictureType;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class WordProcessor {
+    private final String filePath;
+    private List<String> extractedText;
+    private List<XWPFTable> extractedTables;
+    private List<XWPFPicture> extractedImages;
+
+    public WordProcessor(String filePath) {
+        this.filePath = filePath;
+        this.extractedText = new ArrayList<>();
+        this.extractedTables = new ArrayList<>();
+        this.extractedImages = new ArrayList<>();
+    }
+
+    public void process() {
+        File file = new File(filePath);
+        if (filePath.endsWith(".docx")) {
+            processDocx(file);
+        } else if (filePath.endsWith(".doc")) {
+            processDoc(file);
+        } else {
+            throw new IllegalArgumentException("不支持的文件格式：" + filePath);
+        }
+    }
+
+    private void processDocx(File file) {
+        try (FileInputStream fis = new FileInputStream(file);
+             XWPFDocument document = new XWPFDocument(fis)) {
+
+            // 提取文本
+            extractTextFromDocx(document);
+
+            // 提取表格
+            extractTablesFromDocx(document);
+
+            // 提取图片
+            extractImagesFromDocx(document);
+
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    private void processDoc(File file) {
+        try (FileInputStream fis = new FileInputStream(file);
+             POIFSFileSystem fs = new POIFSFileSystem(fis)) {
+
+            HWPFDocument document = new HWPFDocument(fs);
+
+            // 提取文本
+            Range range = document.getRange();
+            extractTextFromDoc(range);
+
+            // 提取表格
+            extractTablesFromDoc(range);
+
+            // 提取图片（如果可能）
+            extractImagesFromDoc(document);
+
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    private void extractTextFromDocx(XWPFDocument document) {
+        // 提取段落文本
+        for (XWPFParagraph paragraph : document.getParagraphs()) {
+            String text = paragraph.getText().trim();
+            if (!text.isEmpty()) {
+                extractedText.add(text);
+            }
+        }
+    }
+
+    private void extractTablesFromDocx(XWPFDocument document) {
+        // 提取表格
+        for (XWPFTable table : document.getTables()) {
+            extractedTables.add(table);
+
+            // 处理表格内容
+            for (XWPFTableRow row : table.getRows()) {
+                StringBuilder rowContent = new StringBuilder();
+                for (XWPFTableCell cell : row.getTableCells()) {
+                    rowContent.append(cell.getText()).append("\t");
+                }
+                extractedText.add("表格行：" + rowContent.toString().trim());
+            }
+        }
+    }
+
+    private void extractImagesFromDocx(XWPFDocument document) {
+        // 提取图片
+        for (XWPFParagraph paragraph : document.getParagraphs()) {
+            for (XWPFRun run : paragraph.getRuns()) {
+                List<XWPFPicture> pictures = run.getEmbeddedPictures();
+                extractedImages.addAll(pictures);
+            }
+        }
+    }
+
+    private void extractTextFromDoc(Range range) {
+        String text = range.text();
+        // 按段落分割
+        String[] paragraphs = text.split("\\r?\\n");
+        for (String paragraph : paragraphs) {
+            if (!paragraph.trim().isEmpty()) {
+                extractedText.add(paragraph.trim());
+            }
+        }
+    }
+
+    private void extractTablesFromDoc(Range range) {
+        for (int i = 0; i < range.numParagraphs(); i++) {
+            if (range.getParagraph(i).isInTable()) {
+                Table table = range.getTable(range.getParagraph(i));
+                processDocTable(table);
+                // 跳过表格中的其他段落
+                i += table.numParagraphs() - 1;
+            }
+        }
+    }
+
+    private void processDocTable(Table table) {
+        List<List<String>> tableData = new ArrayList<>();
+        for (int rowIdx = 0; rowIdx < table.numRows(); rowIdx++) {
+            TableRow row = table.getRow(rowIdx);
+            List<String> rowData = new ArrayList<>();
+
+            for (int colIdx = 0; colIdx < row.numCells(); colIdx++) {
+                TableCell cell = row.getCell(colIdx);
+                String cellText = cell.text().trim();
+                if (cellText.endsWith("\u0007")) {
+                    cellText = cellText.substring(0, cellText.length() - 1);
+                }
+                rowData.add(cellText);
+            }
+
+            tableData.add(rowData);
+            extractedText.add("表格行：" + String.join("\t", rowData));
+        }
+    }
+
+    private void extractImagesFromDoc(HWPFDocument document) {
+        // 注意：HWPF对图片的支持有限
+        try {
+            List<org.apache.poi.hwpf.usermodel.Picture> pictures = document.getPicturesTable().getAllPictures();
+            File outputDir = new File("output_images");
+            if (!outputDir.exists()) {
+                outputDir.mkdirs();
+            }
+
+            int imageCounter = 0;
+            for (org.apache.poi.hwpf.usermodel.Picture picture : pictures) {
+                String extension = picture.suggestFileExtension();
+                String filename = String.format("doc_image_%d.%s", imageCounter++, extension);
+                Path outputPath = Paths.get(outputDir.getPath(), filename);
+
+                // 保存图片数据
+                Files.write(outputPath, picture.getContent());
+            }
+        } catch (Exception e) {
+            System.out.println("警告：提取.doc文件中的图片时出错：" + e.getMessage());
+        }
+    }
+
+    public void saveImages(String outputDir) {
+        try {
+            File dir = new File(outputDir);
+            if (!dir.exists()) {
+                dir.mkdirs();
+            }
+
+            int imageCounter = 0;
+            for (XWPFPicture picture : extractedImages) {
+                // 获取图片数据
+                byte[] pictureData = picture.getPictureData().getData();
+
+                // 确定图片扩展名
+                String extension = getImageExtension(picture.getPictureData().getPictureType());
+                String filename = String.format("image_%d.%s", imageCounter++, extension);
+
+                // 保存图片
+                Path outputPath = Paths.get(dir.getPath(), filename);
+                Files.write(outputPath, pictureData);
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    private String getImageExtension(int pictureType) {
+        // 使用PictureType的常量来处理图片类型
+        if (pictureType == PictureType.PNG.getOoxmlId()) {
+            return "png";
+        } else if (pictureType == PictureType.JPEG.getOoxmlId()) {
+            return "jpg";
+        } else if (pictureType == PictureType.GIF.getOoxmlId()) {
+            return "gif";
+        } else if (pictureType == PictureType.TIFF.getOoxmlId()) {
+            return "tiff";
+        } else if (pictureType == PictureType.BMP.getOoxmlId()) {
+            return "bmp";
+        } else if (pictureType == PictureType.EMF.getOoxmlId()) {
+            return "emf";
+        } else if (pictureType == PictureType.WMF.getOoxmlId()) {
+            return "wmf";
+        } else if (pictureType == PictureType.PICT.getOoxmlId()) {
+            return "pict";
+        } else if (pictureType == PictureType.DIB.getOoxmlId()) {
+            return "dib";
+        } else {
+            return "unknown";
+        }
+    }
+
+    public List<String> getExtractedText() {
+        return extractedText;
+    }
+
+    public List<XWPFTable> getExtractedTables() {
+        return extractedTables;
+    }
+
+    public List<XWPFPicture> getExtractedImages() {
+        return extractedImages;
+    }
+
+    // 将表格转换为结构化数据
+    public List<List<String>> convertTableToList(XWPFTable table) {
+        List<List<String>> tableData = new ArrayList<>();
+
+        for (XWPFTableRow row : table.getRows()) {
+            List<String> rowData = new ArrayList<>();
+            for (XWPFTableCell cell : row.getTableCells()) {
+                rowData.add(cell.getText().trim());
+            }
+            tableData.add(rowData);
+        }
+
+        return tableData;
+    }
+
+    // 导出表格为CSV格式
+    public void exportTableToCSV(XWPFTable table, String outputPath) {
+        try {
+            StringBuilder csv = new StringBuilder();
+
+            for (XWPFTableRow row : table.getRows()) {
+                List<String> rowData = new ArrayList<>();
+                for (XWPFTableCell cell : row.getTableCells()) {
+                    // 处理CSV中的特殊字符
+                    String cellText = cell.getText().trim()
+                            .replace("\"", "\"\"")
+                            .replace(",", "\",\"");
+                    rowData.add("\"" + cellText + "\"");
+                }
+                csv.append(String.join(",", rowData)).append("\n");
+            }
+
+            java.nio.file.Files.write(
+                    new File(outputPath).toPath(),
+                    csv.toString().getBytes());
+
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/embed/EmbedClient.java
+++ b/knows-java/src/main/java/cn/luckday/embed/EmbedClient.java
@ -0,0 +1,83 @@
+package cn.luckday.embed;
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import okhttp3.*;
+
+import java.io.IOException;
+import java.util.*;
+
+public class EmbedClient {
+
+    public static double[] getEmbedding(String uri, String apiKey, String inputText) throws IOException {
+        OkHttpClient client = new OkHttpClient();
+
+        // 创建请求体
+        JSONObject requestBody = new JSONObject();
+        requestBody.put("input", Collections.singletonList(inputText));
+
+        // 创建请求
+        MediaType mediaType = MediaType.parse("application/json; charset=utf-8");
+        RequestBody body = RequestBody.Companion.create(requestBody.toJSONString(), mediaType);
+        Request request = new Request.Builder()
+                .url(uri)
+                .addHeader("Authorization", "Bearer " + apiKey)
+                .addHeader("Content-Type", "application/json")
+                .post(body)
+                .build();
+
+        // 发送请求
+        Response response = client.newCall(request).execute();
+        if (!response.isSuccessful()) {
+            throw new IOException("Unexpected code " + response);
+        }
+
+        // 解析JSON响应
+        String responseBody = response.body().string();
+        EmbeddingResponse embeddingResponse = JSON.parseObject(responseBody, EmbeddingResponse.class);
+
+        // 返回嵌入向量
+        return embeddingResponse.getData().get(0).getEmbedding();
+    }
+
+    static class EmbeddingResponse {
+        private List<Data> data;
+
+        public List<Data> getData() {
+            return data;
+        }
+
+        public void setData(List<Data> data) {
+            this.data = data;
+        }
+    }
+
+    static class Data {
+        private double[] embedding;
+        private int index;
+        private String object;
+
+        public double[] getEmbedding() {
+            return embedding;
+        }
+
+        public void setEmbedding(double[] embedding) {
+            this.embedding = embedding;
+        }
+
+        public int getIndex() {
+            return index;
+        }
+
+        public void setIndex(int index) {
+            this.index = index;
+        }
+
+        public String getObject() {
+            return object;
+        }
+
+        public void setObject(String object) {
+            this.object = object;
+        }
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/embed/ReRankClient.java
+++ b/knows-java/src/main/java/cn/luckday/embed/ReRankClient.java
@ -0,0 +1,34 @@
+package cn.luckday.embed;
+
+import com.alibaba.fastjson.JSONObject;
+import okhttp3.*;
+
+import java.io.IOException;
+import java.util.List;
+
+public class ReRankClient {
+
+    public static String reRank(String uri, String apiKey, List<String> textsList, String query) throws IOException {
+        OkHttpClient client = new OkHttpClient();
+        JSONObject requestBody = new JSONObject();
+        String[] texts = textsList.toArray(new String[0]);
+        requestBody.put("textList", texts);
+        requestBody.put("query", query);
+        // 创建请求
+        MediaType mediaType = MediaType.parse("application/json; charset=utf-8");
+        RequestBody body = RequestBody.Companion.create(requestBody.toJSONString(), mediaType);
+        Request request = new Request.Builder()
+                .url(uri)
+                .addHeader("Authorization", "Bearer " + apiKey)
+                .addHeader("Content-Type", "application/json")
+                .post(body)
+                .build();
+
+        // 发送请求
+        Response response = client.newCall(request).execute();
+        if (!response.isSuccessful()) {
+            throw new IOException("Unexpected code " + response);
+        }
+        return response.body().string();
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/filter/AccessControlFilter.java
+++ b/knows-java/src/main/java/cn/luckday/filter/AccessControlFilter.java
@ -0,0 +1,46 @@
+package cn.luckday.filter;
+
+import jakarta.servlet.*;
+import jakarta.servlet.annotation.WebFilter;
+import jakarta.servlet.http.HttpServletRequest;
+import jakarta.servlet.http.HttpServletResponse;
+import org.springframework.core.annotation.Order;
+import org.springframework.stereotype.Component;
+
+import java.io.IOException;
+
+@Component
+@WebFilter(urlPatterns = "/*", asyncSupported = true)
+@Order(1)
+public class AccessControlFilter implements Filter {
+
+    @Override
+    public void init(FilterConfig filterConfig) throws ServletException {
+    }
+
+    @Override
+    public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException {
+        HttpServletRequest httpServletRequest = (HttpServletRequest) request;
+        HttpServletResponse httpServletResponse = (HttpServletResponse) response;
+
+        // 获取源站
+        String origin = httpServletRequest.getHeader("origin");
+        httpServletResponse.setHeader("Access-Control-Allow-Origin", "*");
+        httpServletResponse.setHeader("Access-Control-Allow-Headers", "Content-Type,Content-Length, Authorization, Accept,X-Requested-With,cors, content-type, luck-token, userId, user, type");
+        httpServletResponse.setHeader("Access-Control-Allow-Credentials", "true");
+        httpServletResponse.setHeader("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,PATCH,OPTIONS");
+        httpServletResponse.setHeader("Access-Control-Max-Age", "3600");
+
+        if ("OPTIONS".equals(httpServletRequest.getMethod())) {
+            httpServletResponse.setStatus(HttpServletResponse.SC_OK);
+        } else {
+            chain.doFilter(request, response);
+        }
+    }
+
+    @Override
+    public void destroy() {
+
+    }
+
+}
--- a/knows-java/src/main/java/cn/luckday/llm/OllamaClient.java
+++ b/knows-java/src/main/java/cn/luckday/llm/OllamaClient.java
@ -0,0 +1,85 @@
+package cn.luckday.llm;
+
+import com.alibaba.fastjson2.JSON;
+import jakarta.servlet.http.HttpServletResponse;
+
+import java.io.*;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+import java.util.HashMap;
+
+public class OllamaClient {
+
+    private static Map<String, Object> PARAMS = new HashMap<>();
+    private static Map<String, Object> OPTIONS = new HashMap<>();
+
+    static {
+        OPTIONS.put("temperature", 0.3);     // # 控制随机性（0-1，值越大越随机）
+        OPTIONS.put("top_p", 0.5);           // # 采样策略（0-1，值越小越集中）
+        OPTIONS.put("max_tokens", 1024);     // # 生成的最大 token 数
+
+        PARAMS.put("model", "deepseek-r1:32b");
+        PARAMS.put("stream", true);
+        PARAMS.put("options", OPTIONS);
+    }
+
+    public static String PROMPT = "你是一个知识库，必须严格按照知识库检索的内容做最精简的回答，只回答关键信息，坚决杜绝胡编乱造，注意字数。" +
+            "当所有知识库内容都与产品问题无关时，或者知识库检索到任何相关信息时，你的回答必须是“没有找到”这句话。" +
+            "        以下是知识库：\n" +
+            "        { %content% }\n" +
+            "        以上是知识库。 \n 以下是提问：";
+
+    public static void sendMsg(HttpServletResponse response, String uri, String query, String content) {
+        try {
+            // 设置SSE必要的响应头
+            response.setContentType("text/event-stream");
+            response.setCharacterEncoding("UTF-8");
+            response.setHeader("Cache-Control", "no-cache");
+            response.setHeader("Connection", "keep-alive");
+
+            URL url = new URL(uri);
+            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+            conn.setRequestMethod("POST");
+            conn.setRequestProperty("Accept", "text/event-stream");
+            conn.setRequestProperty("Content-Type", "application/json");
+            conn.setDoOutput(true);
+
+            PARAMS.put("prompt", PROMPT.replace("%content%", content) + query);
+            String json = JSON.toJSONString(PARAMS);
+
+            try (OutputStream os = conn.getOutputStream()) {
+                os.write(json.getBytes(StandardCharsets.UTF_8));
+            }
+
+            int responseCode = conn.getResponseCode();
+
+            if (responseCode >= HttpURLConnection.HTTP_OK && responseCode < HttpURLConnection.HTTP_USE_PROXY) {
+                try (BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8));
+                     PrintWriter writer = response.getWriter()) {
+
+                    String line;
+                    while ((line = br.readLine()) != null) {
+                        if (!line.trim().isEmpty()) {
+                            // 构造SSE消息格式
+                            writer.write("data: " + line + "\n\n");
+                            writer.flush();
+                        }
+                    }
+                }
+            } else {
+                throw new RuntimeException("Failed : HTTP error code : " + responseCode);
+            }
+        } catch (Exception e) {
+            try {
+                response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+                PrintWriter writer = response.getWriter();
+                writer.write("data: {\"error\": \"" + e.getMessage() + "\"}\n\n");
+                writer.flush();
+            } catch (IOException ioe) {
+                e.printStackTrace();
+            }
+        }
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/llm/QwenClient.java
+++ b/knows-java/src/main/java/cn/luckday/llm/QwenClient.java
@ -0,0 +1,45 @@
+package cn.luckday.llm;
+
+import java.util.Arrays;
+import com.alibaba.dashscope.aigc.generation.Generation;
+import com.alibaba.dashscope.aigc.generation.GenerationParam;
+import com.alibaba.dashscope.aigc.generation.GenerationResult;
+import com.alibaba.dashscope.common.Message;
+import com.alibaba.dashscope.common.Role;
+import com.alibaba.dashscope.exception.ApiException;
+import com.alibaba.dashscope.exception.InputRequiredException;
+import com.alibaba.dashscope.exception.NoApiKeyException;
+
+public class QwenClient {
+
+    public static GenerationResult sendMsg(String model, String apiKey, String query, String content) throws ApiException, NoApiKeyException, InputRequiredException {
+        Generation gen = new Generation();
+
+        Message systemMsg = Message.builder()
+                .role(Role.SYSTEM.getValue())
+                .content("你是一个知识库，必须严格按照知识库检索的内容做最精简的回答，只回答关键信息，坚决杜绝胡编乱造，注意数字。" +
+                        "当所有知识库内容都与产品问题无关时，或者知识库检索到任何相关信息时，你的回答必须是“没有找到”这句话。" +
+                        "        以下是知识库：\n" +
+                        "        {" + content + "}\n" +
+                        "        以上是知识库。")
+                .build();
+
+        Message userMsg = Message.builder()
+                .role(Role.USER.getValue())
+                .content(query)
+                .build();
+
+        GenerationParam param = GenerationParam.builder()
+                .model(model)
+                .messages(Arrays.asList(systemMsg, userMsg))
+                .resultFormat(GenerationParam.ResultFormat.MESSAGE)
+                .apiKey(apiKey)
+                .topK(50)
+                .temperature(0.1f)
+                .topP(0.8)
+                .seed(1234)
+                .build();
+
+        return gen.call(param);
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/service/EsDocumentService.java
+++ b/knows-java/src/main/java/cn/luckday/service/EsDocumentService.java
@ -0,0 +1,143 @@
+package cn.luckday.service;
+
+
+import co.elastic.clients.elasticsearch.ElasticsearchClient;
+import co.elastic.clients.elasticsearch._types.Script;
+import co.elastic.clients.elasticsearch._types.query_dsl.*;
+import co.elastic.clients.elasticsearch.core.IndexResponse;
+import co.elastic.clients.elasticsearch.core.SearchResponse;
+import co.elastic.clients.elasticsearch.indices.CreateIndexRequest;
+import co.elastic.clients.elasticsearch.indices.CreateIndexResponse;
+import co.elastic.clients.json.JsonData;
+import cn.luckday.bean.SearchResult;
+import cn.luckday.bean.KnowsIndex;
+import cn.luckday.embed.EmbedClient;
+import jakarta.annotation.Resource;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+@Slf4j
+@Service
+public class EsDocumentService {
+
+    @Value("${embedding.uri}")
+    private String embeddingUri;
+
+    @Value("${embedding.api-key}")
+    private String embeddingApiKey;
+
+    @Resource
+    private ElasticsearchClient client;
+
+    public static final String INDEX_NAME = "knows_index";
+
+    public static final float SIMILARITY_THRESHOLD  = 0.2f;
+
+    /**
+     * 创建索引
+     * @throws IOException 异常
+     */
+    public void createIndex() throws IOException {
+        CreateIndexRequest request = new CreateIndexRequest.Builder()
+                .index(INDEX_NAME)
+
+                .mappings(m -> m
+                        .properties("file_name", p -> p.keyword(k -> k))
+                        .properties("file_path", p -> p.keyword(k -> k))
+                        .properties("file_type", p -> p.keyword(k -> k))
+                        .properties("file_size", p -> p.keyword(k -> k))
+                        .properties("remark_vec", p -> p
+                                .denseVector(dv -> dv
+                                        .dims(1024)
+                                        .index(true)
+                                        .similarity("cosine")
+                                )
+                        )
+                        .properties("remark", p -> p
+                                .text(t -> t)
+                        )
+//                        .properties("remark", p -> p
+//                                .text(t -> t.searchAnalyzer("ik_smart")
+//                                        .analyzer("ik_smart") // 使用 IK 分词器
+//                                )
+//                        )
+                )
+                .build();
+
+        CreateIndexResponse createIndexResponse = client.indices().create(request);
+        log.info("Index created: {}", createIndexResponse.acknowledged());
+    }
+
+    /**
+     * 添加数据
+     * @param knowsIndexList 数据
+     * @throws IOException 异常
+     */
+    public void indexSellList(List<KnowsIndex> knowsIndexList) throws IOException {
+        for (KnowsIndex knowsIndex : knowsIndexList) {
+            knowsIndex.setContent_vec(EmbedClient.getEmbedding(embeddingUri, embeddingApiKey, knowsIndex.getContent()));
+            IndexResponse response = client.index(i -> i
+                    .index(INDEX_NAME)
+                    .id(knowsIndex.getId())
+                    .document(knowsIndex)
+            );
+            log.info("Sell indexed: {}", response.id());
+        }
+    }
+
+
+    /**
+     * 检索
+     *
+     * @param queryVector 向量
+     */
+    public List<SearchResult> searchVector(double[] queryVector) throws IOException {
+        // 创建向量相似度查询
+        ScriptScoreQuery scriptScoreQuery = ScriptScoreQuery.of(q -> q
+                .query(QueryBuilders.matchAll().build()._toQuery())
+                .script(Script.of(s -> s.inline(i -> i
+                        .source("double score = cosineSimilarity(params.query_vector, 'content_vec'); " +
+                                "score = Math.min(1.0, Math.max(0.0, score)); " + // 确保评分在[0, 1]之间
+                                "if (score < params.threshold) { return 0; } else { return score; }")
+                        .params(Map.of(
+                                "query_vector", JsonData.of(queryVector),
+                                "threshold", JsonData.of(SIMILARITY_THRESHOLD) // 将阈值作为参数传递给脚本
+                        ))))));
+
+        // 创建bool查询，向量相似度查询作为should子句
+        Query boolQuery = QueryBuilders.bool(b -> b
+                .should(scriptScoreQuery._toQuery())
+        );
+
+        Query functionScoreQuery = QueryBuilders.functionScore(fs -> fs
+                .query(boolQuery)
+                .scoreMode(FunctionScoreMode.Max)
+                .boostMode(FunctionBoostMode.Replace)
+                .minScore((double) SIMILARITY_THRESHOLD)
+        );
+
+        // 执行合并后的查询
+        SearchResponse<KnowsIndex> combinedSearchResponse = client.search(s -> s
+                        .index(INDEX_NAME)
+                        .query(functionScoreQuery),
+                KnowsIndex.class);
+
+        // 处理查询的结果
+        return combinedSearchResponse.hits().hits().stream()
+                .map(hit -> {
+                    double finalScore = Objects.nonNull(hit.score()) ? hit.score() : 0.0;
+                    return finalScore >= SIMILARITY_THRESHOLD ? new SearchResult(hit.source(), finalScore) : null;
+                })
+                .filter(Objects::nonNull)
+                .sorted(Comparator.comparingDouble(SearchResult::getScore).reversed())
+                .collect(Collectors.toList());
+    }
+}
--- a/knows-java/src/main/java/cn/luckday/service/RedFileService.java
+++ b/knows-java/src/main/java/cn/luckday/service/RedFileService.java
@ -0,0 +1,135 @@
+package cn.luckday.service;
+
+import cn.hutool.core.util.IdUtil;
+import cn.luckday.bean.KnowsIndex;
+import cn.luckday.embed.EmbedClient;
+import cn.luckday.document.PDFParser;
+import cn.luckday.document.WordProcessor;
+import jakarta.annotation.Resource;
+import org.apache.poi.xwpf.usermodel.XWPFPicture;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+@Service
+public class RedFileService {
+    private static final String TEMP_DIR = "src/main/resources/temp_uploads";
+
+    @Value("${embedding.uri}")
+    private String embeddingUri;
+
+    @Value("${embedding.api-key}")
+    private String embeddingApiKey;
+
+    @Resource
+    private EsDocumentService esDocumentService;
+
+    public void uploadFile(MultipartFile file) {
+        try {
+            String projectPath = System.getProperty("user.dir");
+            Path tempDirPath = Paths.get(projectPath, TEMP_DIR);
+            if (!Files.exists(tempDirPath)) {
+                Files.createDirectories(tempDirPath);
+            }
+
+            // 获取文件名和扩展名
+            String originalFilename = file.getOriginalFilename();
+            String fileExtension = getFileExtension(originalFilename);
+
+            // 生成临时文件路径
+            String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"));
+            String tempFileName = timestamp + "_" + originalFilename;
+            Path tempFilePath = Paths.get(projectPath, TEMP_DIR, tempFileName);
+
+            // 保存上传的文件
+            file.transferTo(tempFilePath.toFile());
+
+            // 解析文件内容
+            Map<String, Object> parsedContent = parseFile(tempFilePath.toString(), fileExtension);
+
+            // 保存到 Elasticsearch
+            String text = parsedContent.get("text").toString();
+
+            KnowsIndex knowsIndex = new KnowsIndex();
+            knowsIndex.setId(String.valueOf(IdUtil.getSnowflakeNextId()));
+            knowsIndex.setContent(text);
+            knowsIndex.setContent_vec(EmbedClient.getEmbedding(embeddingUri, embeddingApiKey, text));
+            esDocumentService.indexSellList(Arrays.asList(knowsIndex));
+
+            // 清理临时文件
+            Files.deleteIfExists(tempFilePath);
+
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private String getFileExtension(String filename) {
+        if (filename == null)
+            return "";
+        int lastDotIndex = filename.lastIndexOf('.');
+        return (lastDotIndex == -1) ? "" : filename.substring(lastDotIndex + 1).toLowerCase();
+    }
+
+    private Map<String, Object> parseFile(String filePath, String extension) throws Exception {
+        Map<String, Object> content = new HashMap<>();
+
+        switch (extension) {
+            case "pdf":
+                PDFParser pdfParser = new PDFParser(filePath);
+                pdfParser.parse();
+
+                // 获取解析结果
+                List<String> texts = pdfParser.getExtractedText();
+                List<BufferedImage> images = pdfParser.getExtractedImages();
+
+                // 合并所有文本
+                StringBuilder fullText = new StringBuilder();
+                for (String text : texts) {
+                    fullText.append(text).append("\n");
+                }
+
+                content.put("text", fullText.toString());
+                content.put("imageCount", images.size());
+                break;
+
+            case "docx":
+                WordProcessor wordProcessor = new WordProcessor(filePath);
+                wordProcessor.process();
+
+                List<String> extractedText = wordProcessor.getExtractedText();
+                // 合并所有文本
+                StringBuilder docxFullText = new StringBuilder();
+                for (String text : extractedText) {
+                    docxFullText.append(text).append("\n");
+                }
+
+                List<XWPFPicture> extractedImages = wordProcessor.getExtractedImages();
+                content.put("text", docxFullText.toString());
+                content.put("imageCount", extractedImages.size());
+                break;
+
+            default:
+                throw new IllegalArgumentException("不支持的文件类型: " + extension);
+        }
+
+        // 添加元数据
+        content.put("filename", new File(filePath).getName());
+        content.put("uploadTime", LocalDateTime.now().toString());
+        content.put("fileType", extension);
+
+        return content;
+    }
+}
--- a/knows-java/src/main/resources/application.yml
+++ b/knows-java/src/main/resources/application.yml
@ -0,0 +1,32 @@
+server:
+  port: 8899
+
+spring:
+  servlet:
+    multipart:
+      max-file-size: 10MB
+      max-request-size: 10MB
+  main:
+    allow-bean-definition-overriding: true
+  application:
+    name: knows
+
+  elasticsearch:
+    uris: 172.16.100.47:9200
+#    username: elastic
+#    password: 123456
+
+qwen:
+  api-key: sk-**********************
+  model: qwen-plus
+
+oll:
+  uri: http://172.16.90.4:11434/api/generate
+
+embedding:
+  uri: http://172.16.90.4:6009/v1/embed
+  api-key: sk-abcdefg1234567
+
+re-rank:
+  uri: http://172.16.90.4:6010/v1/reRank
+  api-key: sk-abcdefg1234567
--- a/knows-java/src/main/resources/native/opencv_java4110.dll
+++ b/knows-java/src/main/resources/native/opencv_java4110.dll
--- a/knows-java/src/main/resources/ocr/chi_sim.traineddata
+++ b/knows-java/src/main/resources/ocr/chi_sim.traineddata
--- a/knows-java/src/main/resources/ocr/eng.traineddata
+++ b/knows-java/src/main/resources/ocr/eng.traineddata
--- a/knows-java/src/main/resources/ocr/osd.traineddata
+++ b/knows-java/src/main/resources/ocr/osd.traineddata
--- a/knows-java/src/test/java/cn/luckday/ApplicationTests.java
+++ b/knows-java/src/test/java/cn/luckday/ApplicationTests.java
@ -0,0 +1,24 @@
+package cn.luckday;
+
+import cn.luckday.service.EsDocumentService;
+import jakarta.annotation.Resource;
+import org.junit.jupiter.api.Test;
+import org.springframework.boot.test.context.SpringBootTest;
+
+import java.io.IOException;
+
+@SpringBootTest
+class ApplicationTests {
+
+    @Test
+    void contextLoads() {
+    }
+
+    @Resource
+    private EsDocumentService service;
+
+    @Test
+    void create() throws IOException {
+        service.createIndex();
+    }
+}
--- a/konws-python/embed/Dockerfile
+++ b/konws-python/embed/Dockerfile
@ -0,0 +1,18 @@
+# 使用官方Python运行时作为父镜像
+FROM python:3.10
+
+# 设置工作目录
+WORKDIR /app
+
+# 将当前目录内容复制到容器的/app中
+ADD . /app
+
+RUN pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
+# 安装程序需要的包
+RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+# 运行时监听的端口
+EXPOSE 6009
+
+# 运行app.py时的命令及其参数
+CMD ["uvicorn", "embed:app", "--host", "0.0.0.0", "--port", "6009"]
--- a/konws-python/embed/embed.py
+++ b/konws-python/embed/embed.py
@ -0,0 +1,76 @@
+import os
+from typing import List
+
+import numpy as np
+import uvicorn
+from fastapi import FastAPI, Depends, HTTPException, status
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from pydantic import BaseModel
+from sentence_transformers import SentenceTransformer, models
+
+# 环境变量传入
+sk_key = os.environ.get('sk-key', 'sk-aaabbbcccdddeeefffggghhhiiijjjkkk')
+
+# 创建一个FastAPI实例
+app = FastAPI()
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# 创建一个HTTPBearer实例
+security = HTTPBearer()
+# 加载预训练的 Transformer 模型
+transformer_model = models.Transformer('./m3e-large', cache_dir='./cache')
+
+# 创建 Mean Pooling 层
+pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(), pooling_mode='mean')
+
+# 构建 SentenceTransformer 模型
+model = SentenceTransformer(modules=[transformer_model, pooling_model])
+
+
+class EmbeddingRequest(BaseModel):
+    input: List[str]
+
+
+class EmbeddingResponse(BaseModel):
+    data: list
+    dimension: int
+
+
+@app.post("/v1/embed", response_model=EmbeddingResponse)
+async def get_embed(request: EmbeddingRequest, credentials: HTTPAuthorizationCredentials = Depends(security)):
+    if credentials.credentials != sk_key:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid authorization code",
+        )
+
+    # 计算嵌入向量和tokens数量
+    embeddings = [model.encode(text) for text in request.input]
+    # 归一化处理
+    embeddings = [embedding / np.linalg.norm(embedding) for embedding in embeddings]
+    # 将numpy数组转换为列表
+    embeddings = [embedding.tolist() for embedding in embeddings]
+
+    response = {
+        "data": [
+            {
+                "embedding": embedding,
+                "index": index
+            } for index, embedding in enumerate(embeddings)
+        ],
+        "dimension": len(embeddings[0])
+    }
+
+    return response
+
+
+if __name__ == "__main__":
+    uvicorn.run("embed:app", host='0.0.0.0', port=6009, workers=2)
--- a/konws-python/embed/requirements.txt
+++ b/konws-python/embed/requirements.txt
@ -0,0 +1,10 @@
+fastapi==0.99.1
+pydantic==1.10.7
+sentence-transformers==3.3.1
+uvicorn==0.23.1
+numpy==1.24.4
+scipy==1.10.1
+scikit-learn==1.3.0
+torchvision
+torchaudio
+torch
--- a/konws-python/rerank/Dockerfile
+++ b/konws-python/rerank/Dockerfile
@ -0,0 +1,18 @@
+# 使用官方Python运行时作为父镜像
+FROM python:3.10
+
+# 设置工作目录
+WORKDIR /app
+
+# 将当前目录内容复制到容器的/app中
+ADD . /app
+
+RUN pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
+# 安装程序需要的包
+RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+# 运行时监听的端口
+EXPOSE 6010
+
+# 运行app.py时的命令及其参数
+CMD ["uvicorn", "rerank:app", "--host", "0.0.0.0", "--port", "6010"]
--- a/konws-python/rerank/requirements.txt
+++ b/konws-python/rerank/requirements.txt
@ -0,0 +1,12 @@
+fastapi==0.99.1
+pydantic==1.10.7
+uvicorn==0.23.1 
+tiktoken==0.4.0
+numpy==1.24.4 
+scipy==1.10.1 
+scikit-learn==1.5.0
+torchvision
+torchaudio
+torch
+BCEmbedding==0.1.5
+starlette~=0.27.0
--- a/konws-python/rerank/rerank.py
+++ b/konws-python/rerank/rerank.py
@ -0,0 +1,58 @@
+import os
+from typing import List
+import uvicorn
+from BCEmbedding import RerankerModel
+from fastapi import FastAPI, Depends, HTTPException, status
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from pydantic import BaseModel
+from starlette.middleware.cors import CORSMiddleware
+
+# 环境变量传入
+sk_key = os.environ.get('sk-key', 'sk-aaabbbcccdddeeefffggghhhiiijjjkkk...')
+
+# 创建一个FastAPI实例
+app = FastAPI()
+
+# 添加CORS中间件
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # 允许所有来源
+    allow_credentials=True,
+    allow_methods=["*"],  # 允许所有方法
+    allow_headers=["*"],  # 允许所有头部
+)
+
+# 创建一个HTTPBearer实例
+security = HTTPBearer()
+
+# 初始化模型
+model = RerankerModel(model_name_or_path="./bce-reranker-base_v1")
+
+
+class ReRankRequest(BaseModel):
+    textList: List[str]
+    query: str
+
+
+class ReRankResponse(BaseModel):
+    rerank_passages: List[str]
+    rerank_scores: List[float]
+    rerank_ids: List[int]
+
+
+# 定义路由，处理rerank请求
+@app.post("/v1/reRank", response_model=ReRankResponse)
+async def get_embeddings(request: ReRankRequest, credentials: HTTPAuthorizationCredentials = Depends(security)):
+    if credentials.credentials != sk_key:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid authorization code",
+        )
+    query = request.query
+    passages = request.textList
+    return model.rerank(query, passages)
+
+
+# 运行应用
+if __name__ == "__main__":
+    uvicorn.run("rerank:app", host='0.0.0.0', port=6010, workers=2)
--- a/konws-web/chatbox.html
+++ b/konws-web/chatbox.html
@ -0,0 +1,296 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+  <head>
+    <meta charset="UTF-8" />
+    <title>DeepSeek 32B Chat</title>
+    <script src="js/marked.min.js"></script>
+    <link rel="stylesheet" href="css/main.css" />
+  </head>
+  <body>
+    <div id="chatBox">
+      <div class="messages-container"></div>
+      <div id="inputArea">
+        <input type="text" id="userInput" placeholder="输入消息..." />
+        <button onclick="sendMessage()" id="sendBtn">
+          <svg class="icon" viewBox="0 0 1057 1024" xmlns="http://www.w3.org/2000/svg" width="20" height="20">
+            <path
+              d="M891.904 825.782857L462.482286 693.613714l429.421714-495.396571-561.517714 495.469714L0.073143 561.590857 1057.133714 0.073143 891.904 825.782857zM462.482286 1024v-231.058286l132.096 65.828572-132.096 165.156571z"
+              fill="#ffffff"
+            ></path>
+          </svg>
+        </button>
+      </div>
+    </div>
+
+    <!-- 添加上传按钮 -->
+    <div class="upload-button-container">
+      <button onclick="showUploadDialog()" class="upload-btn">
+        <svg viewBox="0 0 1024 1024" xmlns="http://www.w3.org/2000/svg" width="20" height="20">
+          <path
+            d="M1024 736s-3.4048-10.24-10.24-20.5056l-150.1696-300.3392c-6.8352-10.24-20.48-20.5056-34.1248-20.5056H706.56c-13.6448 0-23.8848 10.24-23.8848 23.9104v40.96c0 13.6448 10.2144 23.9104 23.8848 23.9104h40.96c13.6448 0 27.2896 10.24 34.1248 20.48l105.8304 215.0656c6.8352 10.1888 0 20.4544-13.6704 20.4544H706.56c-13.6448 0-23.8848 10.24-23.8848 23.9104v122.9056c0 13.6448-10.24 23.9104-23.9104 23.9104H365.2352a23.3216 23.3216 0 0 1-23.8848-23.9104v-122.9056c0-13.6448-10.24-23.9104-23.9104-23.9104H146.7648c-13.6448 0-17.0752-10.24-13.6448-20.4544l109.2352-215.0656c6.8352-10.2144 20.48-20.48 34.1248-20.48h37.5552c13.6448 0 23.9104-10.24 23.9104-23.9104v-37.5552c0-13.6448-10.24-23.8848-23.9104-23.8848h-122.88c-13.6704 0-27.3152 10.2144-34.1248 20.48L10.24 715.4944c-6.8352 10.2656-10.24 20.5056-10.24 20.5056v235.4944c0 13.6448 10.24 23.9104 23.8848 23.9104h976.2048c13.6448 0 23.9104-10.24 23.9104-23.9104V736zM300.3648 292.2752h126.2848v358.4h170.6752v-358.4h133.12c13.6448 0 17.0752-6.8352 6.8352-17.0496l-211.6352-238.9504c-6.8352-10.2144-23.8848-10.2144-30.72 0l-204.8 238.9504c-6.8096 10.2144-3.4048 17.0496 10.24 17.0496z"
+            fill="#fff"
+          ></path>
+        </svg>
+      </button>
+    </div>
+
+    <!-- 文件上传弹窗 -->
+    <div id="uploadDialog" class="upload-dialog">
+      <div class="upload-dialog-content">
+        <span class="close-btn" onclick="closeUploadDialog()">&times;</span>
+        <h2>上传文件</h2>
+        <div class="upload-area" id="dropZone">
+          <input type="file" id="fileInput" style="display: none" onchange="handleFileSelect(event)" />
+          <div class="upload-placeholder" onclick="document.getElementById('fileInput').click()">
+            <i class="fas fa-cloud-upload-alt"></i>
+            <p>点击或拖拽文件到此处上传</p>
+            <p class="supported-formats">支持的格式: PDF, DOC, DOCX</p>
+          </div>
+        </div>
+        <div id="uploadProgress" class="upload-progress" style="display: none">
+          <div class="progress-bar">
+            <div class="progress-fill"></div>
+          </div>
+          <span class="progress-text">0%</span>
+        </div>
+        <div id="uploadStatus" class="upload-status"></div>
+      </div>
+    </div>
+
+    <script>
+      const url = "http://localhost:8899";
+
+      let currentBotMessage = null;
+
+      // 添加消息到聊天框
+      function addMessage(content, isUser = false) {
+        const messagesContainer = document.querySelector(".messages-container");
+        const messageDiv = document.createElement("div");
+        messageDiv.className = `message ${isUser ? "user-message" : "bot-message"}`;
+
+        // 创建头像元素
+        const avatar = document.createElement("img");
+        avatar.className = "avatar";
+        avatar.src = isUser ? "./images/user-avatar.png" : "/images/bot-avatar.png";
+        avatar.alt = isUser ? "User Avatar" : "Bot Avatar";
+
+        // 创建消息内容容器
+        const messageContent = document.createElement("div");
+        messageContent.className = "message-content";
+
+        if (isUser) {
+          messageContent.textContent = content;
+        } else {
+          messageContent.innerHTML = marked.parse(content);
+        }
+
+        // 组装消息元素
+        messageDiv.appendChild(avatar);
+        messageDiv.appendChild(messageContent);
+
+        messagesContainer.appendChild(messageDiv);
+        messagesContainer.scrollTop = messagesContainer.scrollHeight;
+        return messageDiv;
+      }
+
+      // 修改处理流式响应的部分
+      async function streamResponse(prompt) {
+        const btn = document.getElementById("sendBtn");
+        btn.disabled = true;
+        let accumulatedContent = "";
+
+        try {
+          const response = await fetch(url + "/knows/generate", {
+            method: "POST",
+            headers: {
+              "Content-Type": "application/json",
+              Accept: "text/event-stream"
+            },
+            body: JSON.stringify({
+              keyword: prompt
+            })
+          });
+
+          if (!response.ok) {
+            throw new Error(`HTTP error! status: ${response.status}`);
+          }
+
+          const reader = response.body.getReader();
+          const decoder = new TextDecoder();
+
+          if (!currentBotMessage) {
+            // 创建完整的消息结构，包括头像
+            const messageDiv = document.createElement("div");
+            messageDiv.className = "message bot-message";
+
+            // 创建头像元素
+            const avatar = document.createElement("img");
+            avatar.className = "avatar";
+            avatar.src = "./images/bot-avatar.png";
+            avatar.alt = "Bot Avatar";
+
+            // 创建消息内容容器
+            const messageContent = document.createElement("div");
+            messageContent.className = "message-content";
+
+            // 组装消息元素
+            messageDiv.appendChild(avatar);
+            messageDiv.appendChild(messageContent);
+
+            document.querySelector(".messages-container").appendChild(messageDiv);
+            currentBotMessage = messageContent; // 更新 currentBotMessage 为消息内容容器
+          }
+
+          let thinkContent = true;
+          while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+
+            const chunk = decoder.decode(value);
+            const lines = chunk.split("\n").filter((line) => line.trim().startsWith("data: "));
+
+            for (const line of lines) {
+              try {
+                // 移除 "data: " 前缀并解析JSON
+                const jsonData = JSON.parse(line.substring(6));
+
+                if (jsonData.response) {
+                  let content = jsonData.response;
+                  // if (content.includes("\u003c/think\u003e")) {
+                  //   thinkContent = false;
+                  // }
+
+                  // if (!thinkContent) {
+                  //   accumulatedContent += content;
+                  //   currentBotMessage.innerHTML = marked.parse(accumulatedContent);
+                  // }
+
+                  accumulatedContent += content;
+                  currentBotMessage.innerHTML = marked.parse(accumulatedContent);
+                }
+
+                if (jsonData.done) {
+                  currentBotMessage = null;
+                }
+              } catch (error) {
+                console.error("解析数据失败:", error, "原始数据:", line);
+                continue;
+              }
+            }
+
+            document.querySelector(".messages-container").scrollTop = document.querySelector(".messages-container").scrollHeight;
+          }
+        } catch (error) {
+          console.error("请求失败:", error);
+          addMessage(`[错误] ${error.message}`, false);
+        } finally {
+          btn.disabled = false;
+        }
+      }
+
+      // 发送消息
+      async function sendMessage() {
+        const input = document.getElementById("userInput");
+        const userMessage = input.value.trim();
+
+        if (!userMessage) return;
+
+        addMessage(userMessage, true);
+        input.value = "";
+
+        await streamResponse(userMessage);
+      }
+
+      // 回车键发送
+      document.getElementById("userInput").addEventListener("keypress", (e) => {
+        if (e.key === "Enter" && !e.shiftKey) {
+          e.preventDefault();
+          sendMessage();
+        }
+      });
+
+      function showUploadDialog() {
+        document.getElementById("uploadDialog").style.display = "block";
+      }
+
+      function closeUploadDialog() {
+        document.getElementById("uploadDialog").style.display = "none";
+        resetUploadDialog();
+      }
+
+      function resetUploadDialog() {
+        document.getElementById("fileInput").value = "";
+        document.getElementById("uploadProgress").style.display = "none";
+        document.getElementById("uploadStatus").innerHTML = "";
+        document.getElementById("uploadStatus").className = "upload-status";
+      }
+
+      function handleFileSelect(event) {
+        const file = event.target.files[0];
+        if (file) {
+          uploadFile(file);
+        }
+      }
+
+      function updateProgress(percent) {
+        const progressBar = document.querySelector(".progress-fill");
+        const progressText = document.querySelector(".progress-text");
+        progressBar.style.width = `${percent}%`;
+        progressText.textContent = `${percent}%`;
+      }
+
+      function uploadFile(file) {
+        const formData = new FormData();
+        formData.append("file", file);
+
+        const progressDiv = document.getElementById("uploadProgress");
+        const statusDiv = document.getElementById("uploadStatus");
+
+        progressDiv.style.display = "block";
+        statusDiv.innerHTML = "正在上传...";
+        statusDiv.className = "upload-status";
+
+        fetch(url + "/api/file/upload", {
+          method: "POST",
+          body: formData
+        })
+          .then((response) => response.json())
+          .then((data) => {
+            statusDiv.innerHTML = data.message;
+            statusDiv.className = "upload-status success";
+            updateProgress(100);
+            setTimeout(() => {
+              closeUploadDialog();
+            }, 2000);
+          })
+          .catch((error) => {
+            statusDiv.innerHTML = "上传失败: " + error.message;
+            statusDiv.className = "upload-status error";
+            updateProgress(0);
+          });
+      }
+
+      // 添加拖拽上传支持
+      const dropZone = document.getElementById("dropZone");
+
+      dropZone.addEventListener("dragover", (e) => {
+        e.preventDefault();
+        dropZone.style.borderColor = "#4CAF50";
+      });
+
+      dropZone.addEventListener("dragleave", (e) => {
+        e.preventDefault();
+        dropZone.style.borderColor = "#ccc";
+      });
+
+      dropZone.addEventListener("drop", (e) => {
+        e.preventDefault();
+        dropZone.style.borderColor = "#ccc";
+        const file = e.dataTransfer.files[0];
+        if (file) {
+          uploadFile(file);
+        }
+      });
+    </script>
+  </body>
+</html>
--- a/konws-web/css/main.css
+++ b/konws-web/css/main.css
@ -0,0 +1,337 @@
+body {
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
+  max-width: 800px;
+  margin: 0 auto;
+  padding: 0;
+  background-color: #f5f5f5;
+  height: 100vh;
+  display: flex;
+  flex-direction: column;
+}
+
+#chatBox {
+  flex: 1;
+  background: #ededed;
+  padding: 20px;
+  overflow-y: auto;
+  margin: 50px;
+  border-radius: 18px;
+  position: relative;
+  display: flex;
+  flex-direction: column;
+  padding-bottom: 80px;
+  scrollbar-width: none;
+  box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+  /* Firefox */
+  -ms-overflow-style: none;
+  /* IE and Edge */
+}
+
+#chatBox::-webkit-scrollbar {
+  display: none;
+  /* Chrome, Safari, Opera */
+}
+
+.messages-container {
+  flex: 1;
+  overflow-y: auto;
+  padding: 20px;
+  scrollbar-width: none;
+  /* Firefox */
+  -ms-overflow-style: none;
+  /* IE and Edge */
+}
+
+.messages-container::-webkit-scrollbar {
+  display: none;
+  /* Chrome, Safari, Opera */
+}
+
+.message {
+  margin: 10px 0;
+  padding: 10px 15px;
+  border-radius: 4px;
+  max-width: 70%;
+  word-wrap: break-word;
+  position: relative;
+  line-height: 1.5;
+  font-size: 15px;
+  width: max-content;
+  display: flex;
+  align-items: flex-start;
+  gap: 10px;
+}
+
+.avatar {
+  width: 40px;
+  height: 40px;
+  border-radius: 50%;
+  flex-shrink: 0;
+}
+
+.message-content {
+  padding: 10px 15px;
+  border-radius: 15px;
+}
+
+.user-message {
+  width: max-content;
+  margin-left: auto;
+  flex-direction: row-reverse;
+}
+
+.user-message .message-content {
+  background: #95ec69;
+  border-radius: 15px 0 15px 15px;
+}
+
+.bot-message {
+  background: white;
+  margin-right: auto;
+  border-radius: 0 15px 15px 15px;
+}
+
+#inputArea {
+  position: absolute;
+  bottom: 20px;
+  left: 50%;
+  transform: translateX(-50%);
+  display: flex;
+  gap: 10px;
+  padding: 15px;
+  background: white;
+  border-radius: 20px;
+  box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+  width: calc(100% - 100px);
+  max-width: 600px;
+  box-sizing: border-box;
+}
+
+#userInput {
+  flex-grow: 1;
+  padding: 8px 12px;
+  border: 1px solid #ddd;
+  border: none;
+  border-radius: 4px;
+  font-size: 15px;
+  background: white;
+  outline: none;
+}
+
+#userInput:focus {
+  border-color: #07c160;
+}
+
+button {
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  background: #07c160;
+  color: white;
+  border: none;
+  border-radius: 4px;
+  cursor: pointer;
+  transition: background 0.2s;
+  font-size: 15px;
+  border-radius: 50%;
+  width: 40px;
+  height: 40px;
+  position: absolute;
+  right: 18px;
+}
+
+button:hover {
+  background: #06ae56;
+}
+
+button:disabled {
+  background: #9fd7b5;
+  cursor: not-allowed;
+}
+
+/* Markdown 样式优化 */
+.message pre {
+  background: #f8f9fa;
+  padding: 12px;
+  border-radius: 4px;
+  overflow-x: auto;
+  margin: 8px 0;
+  font-size: 14px;
+}
+
+.message code {
+  font-family: Menlo, Monaco, Consolas, "Courier New", monospace;
+  background: rgba(0, 0, 0, 0.05);
+  padding: 2px 4px;
+  border-radius: 3px;
+  font-size: 14px;
+}
+
+.message p {
+  margin: 0;
+}
+
+.message p+p {
+  margin-top: 8px;
+}
+
+/* 滚动条样式 */
+#chatBox::-webkit-scrollbar {
+  width: 6px;
+}
+
+#chatBox::-webkit-scrollbar-track {
+  background: #f1f1f1;
+}
+
+#chatBox::-webkit-scrollbar-thumb {
+  background: #c1c1c1;
+  border-radius: 3px;
+}
+
+#chatBox::-webkit-scrollbar-thumb:hover {
+  background: #a8a8a8;
+}
+
+/* 适配移动端 */
+@media (max-width: 768px) {
+  body {
+    max-width: 100%;
+    height: 100vh;
+    padding: 0;
+  }
+
+  .message {
+    max-width: 85%;
+  }
+
+  #inputArea {
+    padding: 10px;
+  }
+}
+
+.upload-button-container {
+  position: fixed;
+  bottom: 20px;
+  right: 20px;
+  z-index: 1000;
+}
+
+.upload-btn {
+  background-color: #000;
+  color: white;
+  border: none;
+  border-radius: 50%;
+  cursor: pointer;
+  font-size: 16px;
+  transition: background-color 0.3s;
+  bottom: 50px;
+}
+
+.upload-btn:hover {
+  background-color: #1a1a1a;
+}
+
+.upload-dialog {
+  display: none;
+  position: fixed;
+  top: 0;
+  left: 0;
+  width: 100%;
+  height: 100%;
+  background-color: rgba(0, 0, 0, 0.5);
+  z-index: 1001;
+}
+
+.upload-dialog-content {
+  position: relative;
+  background-color: #fefefe;
+  margin: 15% auto;
+  padding: 20px;
+  border-radius: 5px;
+  width: 60%;
+  max-width: 500px;
+}
+
+.close-btn {
+  position: absolute;
+  right: 10px;
+  top: 5px;
+  font-size: 24px;
+  cursor: pointer;
+  color: #888;
+}
+
+.close-btn:hover {
+  color: #555;
+}
+
+.upload-area {
+  border: 2px dashed #ccc;
+  border-radius: 5px;
+  padding: 20px;
+  text-align: center;
+  margin: 20px 0;
+  cursor: pointer;
+  transition: border-color 0.3s;
+}
+
+.upload-area:hover {
+  border-color: #4caf50;
+}
+
+.upload-placeholder {
+  color: #666;
+}
+
+.upload-placeholder i {
+  font-size: 48px;
+  color: #4caf50;
+  margin-bottom: 10px;
+}
+
+.supported-formats {
+  font-size: 12px;
+  color: #888;
+  margin-top: 5px;
+}
+
+.upload-progress {
+  margin: 15px 0;
+}
+
+.progress-bar {
+  width: 100%;
+  height: 20px;
+  background-color: #f0f0f0;
+  border-radius: 10px;
+  overflow: hidden;
+}
+
+.progress-fill {
+  width: 0%;
+  height: 100%;
+  background-color: #4caf50;
+  transition: width 0.3s;
+}
+
+.progress-text {
+  display: block;
+  text-align: center;
+  margin-top: 5px;
+  color: #666;
+}
+
+.upload-status {
+  margin-top: 10px;
+  text-align: center;
+  color: #666;
+}
+
+.upload-status.success {
+  color: #4caf50;
+}
+
+.upload-status.error {
+  color: #f44336;
+}
--- a/konws-web/images/bot-avatar.png
+++ b/konws-web/images/bot-avatar.png
--- a/konws-web/images/user-avatar.png
+++ b/konws-web/images/user-avatar.png
--- a/konws-web/js/marked.min.js
+++ b/konws-web/js/marked.min.js