From 1979941475289e63da51e952e250de026842af7e Mon Sep 17 00:00:00 2001 From: jackTpy <824945044@qq.com> Date: Thu, 31 Jul 2025 10:49:47 +0800 Subject: [PATCH] =?UTF-8?q?feat(ocr):=20=E5=A2=9E=E5=8A=A0=20OCR=20?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E5=B9=B6=E7=BB=98=E5=88=B6=20Base64=20?= =?UTF-8?q?=E5=9B=BE=E5=83=8F=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 OcrCommonRecModel 接口中添加了 recognizeAndDrawToBase64 和 recognizeAndDraw 方法 - 在OcrCommonRecModelImpl 类中实现了这两个新方法 - 新增方法支持将识别结果以 Base64 格式返回,便于在网络中传输和使用 - 优化了现有 recognizeAndDraw 方法,使其支持 byte[] 图像数据 - 在 OcrInfo 类中添加了 base64Img 字段,用于存储识别结果的 Base64 图像 --- .../smartjavaai/ocr/config/OcrRecOptions.java | 1 + .../cn/smartjavaai/ocr/entity/OcrInfo.java | 1 + .../common/recognize/OcrCommonRecModel.java | 16 +++ .../recognize/OcrCommonRecModelImpl.java | 119 ++++++++++++------ 4 files changed, 96 insertions(+), 41 deletions(-) diff --git a/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/config/OcrRecOptions.java b/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/config/OcrRecOptions.java index 86e1b86..310e4a6 100644 --- a/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/config/OcrRecOptions.java +++ b/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/config/OcrRecOptions.java @@ -4,6 +4,7 @@ /** * OCR 识别配置 + * * @author dwj */ @Data diff --git a/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/entity/OcrInfo.java b/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/entity/OcrInfo.java index 9661fdc..e265c30 100644 --- a/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/entity/OcrInfo.java +++ b/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/entity/OcrInfo.java @@ -20,6 +20,7 @@ public class OcrInfo { private String fullText; + private String base64Img; public OcrInfo(List> lineList, String fullText) { diff --git a/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/model/common/recognize/OcrCommonRecModel.java b/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/model/common/recognize/OcrCommonRecModel.java index 8b4a4cd..adef8c6 100644 --- a/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/model/common/recognize/OcrCommonRecModel.java +++ b/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/model/common/recognize/OcrCommonRecModel.java @@ -98,7 +98,23 @@ default void recognizeAndDraw(String imagePath, String outputPath, int fontSize, default BufferedImage recognizeAndDraw(BufferedImage sourceImage, int fontSize, OcrRecOptions options){ throw new UnsupportedOperationException("默认不支持该功能"); } + /** + * 识别并绘制Base64结果 + * @param imageData 图片字节数组 + * @return + */ + default String recognizeAndDrawToBase64(byte[] imageData, int fontSize, OcrRecOptions options){ + throw new UnsupportedOperationException("默认不支持该功能"); + } + /** + * 识别并绘制结果 + * @param imageData 图片字节数组 + * @return + */ + default OcrInfo recognizeAndDraw(byte[] imageData, int fontSize, OcrRecOptions options){ + throw new UnsupportedOperationException("默认不支持该功能"); + } default List batchRecognize(List imageList, OcrRecOptions options) { throw new UnsupportedOperationException("默认不支持该功能"); diff --git a/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/model/common/recognize/OcrCommonRecModelImpl.java b/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/model/common/recognize/OcrCommonRecModelImpl.java index 7fd4806..aa59c6a 100644 --- a/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/model/common/recognize/OcrCommonRecModelImpl.java +++ b/smartjavaai-ocr/src/main/java/cn/smartjavaai/ocr/model/common/recognize/OcrCommonRecModelImpl.java @@ -1,6 +1,5 @@ package cn.smartjavaai.ocr.model.common.recognize; -import ai.djl.Device; import ai.djl.MalformedModelException; import ai.djl.engine.Engine; import ai.djl.inference.Predictor; @@ -12,7 +11,7 @@ import ai.djl.repository.zoo.ModelNotFoundException; import ai.djl.repository.zoo.ModelZoo; import ai.djl.repository.zoo.ZooModel; -import cn.smartjavaai.common.enums.DeviceEnum; +import cn.hutool.core.img.ImgUtil; import cn.smartjavaai.common.pool.PredictorFactory; import cn.smartjavaai.common.utils.FileUtils; import cn.smartjavaai.common.utils.ImageUtils; @@ -28,7 +27,6 @@ import lombok.extern.slf4j.Slf4j; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.pool2.ObjectPool; import org.apache.commons.pool2.impl.GenericObjectPool; import org.opencv.core.Mat; @@ -43,6 +41,7 @@ /** * PPOCRV5 识别模型 + * * @author dwj */ @Slf4j @@ -59,8 +58,8 @@ public class OcrCommonRecModelImpl implements OcrCommonRecModel { private OcrCommonDetModel textDetModel; @Override - public void loadModel(OcrRecModelConfig config){ - if(StringUtils.isBlank(config.getRecModelPath())){ + public void loadModel(OcrRecModelConfig config) { + if (StringUtils.isBlank(config.getRecModelPath())) { throw new OcrException("recModelPath is null"); } this.config = config; @@ -68,11 +67,11 @@ public void loadModel(OcrRecModelConfig config){ this.textDetModel = config.getTextDetModel(); //初始化 识别Criteria Criteria recCriteria = OcrCommonRecCriterialFactory.createCriteria(config); - try{ + try { recognitionModel = ModelZoo.loadModel(recCriteria); this.recPredictorPool = new GenericObjectPool<>(new PredictorFactory<>(recognitionModel)); int predictorPoolSize = config.getPredictorPoolSize(); - if(config.getPredictorPoolSize() <= 0){ + if (config.getPredictorPoolSize() <= 0) { predictorPoolSize = Runtime.getRuntime().availableProcessors(); // 默认等于CPU核心数 } recPredictorPool.setMaxTotal(predictorPoolSize); @@ -88,10 +87,10 @@ public void loadModel(OcrRecModelConfig config){ @Override public OcrInfo recognize(String imagePath, OcrRecOptions options) { - if(StringUtils.isBlank(config.getRecModelPath())){ + if (StringUtils.isBlank(config.getRecModelPath())) { throw new OcrException("recModelPath为空,无法识别"); } - if(!FileUtils.isFileExists(imagePath)){ + if (!FileUtils.isFileExists(imagePath)) { throw new OcrException("图像文件不存在"); } Image img = null; @@ -101,14 +100,13 @@ public OcrInfo recognize(String imagePath, OcrRecOptions options) { } catch (IOException e) { throw new OcrException("无效的图片", e); } finally { - if(img != null){ - ((Mat)img.getWrappedImage()).release(); + if (img != null) { + ((Mat) img.getWrappedImage()).release(); } } } /** - * * @param image * @param options * @return @@ -116,7 +114,7 @@ public OcrInfo recognize(String imagePath, OcrRecOptions options) { @Override public OcrInfo recognize(Image image, OcrRecOptions options) { List result = batchRecognizeDJLImage(Collections.singletonList(image), options); - if(CollectionUtils.isEmpty(result)){ + if (CollectionUtils.isEmpty(result)) { throw new OcrException("OCR识别结果为空"); } return result.get(0); @@ -125,12 +123,13 @@ public OcrInfo recognize(Image image, OcrRecOptions options) { /** * 批量矫正文本框 + * * @param boxList * @param srcMat * @param manager * @return */ - private List batchAlign(List boxList, Mat srcMat,NDManager manager){ + private List batchAlign(List boxList, Mat srcMat, NDManager manager) { List imageList = new ArrayList<>(boxList.size()); for (int i = 0; i < boxList.size(); i++) { //透视变换 + 裁剪 @@ -149,12 +148,13 @@ private List batchAlign(List boxList, Mat srcMat,NDManager manage /** * 批量矫正文本框 + * * @param itemList * @param srcMat * @param manager * @return */ - private List batchAlignWithDirection(List itemList, Mat srcMat,NDManager manager){ + private List batchAlignWithDirection(List itemList, Mat srcMat, NDManager manager) { List imageList = new ArrayList<>(itemList.size()); for (OcrItem ocrItem : itemList) { //放射变换+裁剪 @@ -168,7 +168,6 @@ private List batchAlignWithDirection(List itemList, Mat srcMat,N } - // private RotatedBox recognize(OcrBox box,Mat srcMat,Predictor recPredictor,NDManager manager){ // try { // //透视变换 + 裁剪 @@ -192,11 +191,12 @@ private List batchAlignWithDirection(List itemList, Mat srcMat,N /** * 后处理:排序,分行 + * * @param rotatedBoxes */ - private OcrInfo postProcessOcrResult(List rotatedBoxes, OcrRecOptions ocrRecOptions){ + private OcrInfo postProcessOcrResult(List rotatedBoxes, OcrRecOptions ocrRecOptions) { //不分行 - if(!ocrRecOptions.isEnableLineSplit()){ + if (!ocrRecOptions.isEnableLineSplit()) { return OcrUtils.convertRotatedBoxesToOcrItems(rotatedBoxes); } //Y坐标升序排序 @@ -233,13 +233,13 @@ private OcrInfo postProcessOcrResult(List rotatedBoxes, OcrRecOption @Override public void recognizeAndDraw(String imagePath, String outputPath, int fontSize, OcrRecOptions options) { - if(!FileUtils.isFileExists(imagePath)){ + if (!FileUtils.isFileExists(imagePath)) { throw new OcrException("图像文件不存在"); } try { Image img = ImageFactory.getInstance().fromFile(Paths.get(imagePath)); OcrInfo ocrInfo = recognize(img, options); - if(Objects.isNull(ocrInfo) || Objects.isNull(ocrInfo.getLineList()) || ocrInfo.getLineList().isEmpty()){ + if (Objects.isNull(ocrInfo) || Objects.isNull(ocrInfo.getLineList()) || ocrInfo.getLineList().isEmpty()) { throw new OcrException("未检测到文字"); } Mat wrappedImage = (Mat) img.getWrappedImage(); @@ -254,18 +254,18 @@ public void recognizeAndDraw(String imagePath, String outputPath, int fontSize, @Override public OcrInfo recognize(BufferedImage image, OcrRecOptions options) { - if(!ImageUtils.isImageValid(image)){ + if (!ImageUtils.isImageValid(image)) { throw new OcrException("图像无效"); } Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(image)); OcrInfo ocrInfo = recognize(img, options); - ((Mat)img.getWrappedImage()).release(); + ((Mat) img.getWrappedImage()).release(); return ocrInfo; } @Override public OcrInfo recognize(byte[] imageData, OcrRecOptions options) { - if(Objects.isNull(imageData)){ + if (Objects.isNull(imageData)) { throw new OcrException("图像无效"); } try { @@ -278,12 +278,12 @@ public OcrInfo recognize(byte[] imageData, OcrRecOptions options) { @Override public BufferedImage recognizeAndDraw(BufferedImage sourceImage, int fontSize, OcrRecOptions options) { - if(!ImageUtils.isImageValid(sourceImage)){ + if (!ImageUtils.isImageValid(sourceImage)) { throw new OcrException("图像无效"); } Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(sourceImage)); OcrInfo ocrInfo = recognize(img, options); - if(Objects.isNull(ocrInfo) || Objects.isNull(ocrInfo.getLineList()) || ocrInfo.getLineList().isEmpty()){ + if (Objects.isNull(ocrInfo) || Objects.isNull(ocrInfo.getLineList()) || ocrInfo.getLineList().isEmpty()) { throw new OcrException("未检测到文字"); } try { @@ -300,6 +300,43 @@ public BufferedImage recognizeAndDraw(BufferedImage sourceImage, int fontSize, O } } + @Override + public String recognizeAndDrawToBase64(byte[] imageData, int fontSize, OcrRecOptions options) { + if (Objects.isNull(imageData)) { + throw new OcrException("图像无效"); + } + OcrInfo ocrInfo = recognize(imageData, options); + if (Objects.isNull(ocrInfo) || Objects.isNull(ocrInfo.getLineList()) || ocrInfo.getLineList().isEmpty()) { + throw new OcrException("未检测到文字"); + } + try { + BufferedImage sourceImage = ImageIO.read(new ByteArrayInputStream(imageData)); + OcrUtils.drawRectWithText(sourceImage, ocrInfo, fontSize); + return ImgUtil.toBase64(sourceImage, "png"); + } catch (IOException e) { + throw new OcrException("导出图片失败", e); + } + } + + @Override + public OcrInfo recognizeAndDraw(byte[] imageData, int fontSize, OcrRecOptions options) { + if (Objects.isNull(imageData)) { + throw new OcrException("图像无效"); + } + OcrInfo ocrInfo = recognize(imageData, options); + if (Objects.isNull(ocrInfo) || Objects.isNull(ocrInfo.getLineList()) || ocrInfo.getLineList().isEmpty()) { + throw new OcrException("未检测到文字"); + } + try { + BufferedImage sourceImage = ImageIO.read(new ByteArrayInputStream(imageData)); + OcrUtils.drawRectWithText(sourceImage, ocrInfo, fontSize); + ocrInfo.setBase64Img(ImgUtil.toBase64(sourceImage, "png")); + return ocrInfo; + } catch (IOException e) { + throw new OcrException("导出图片失败", e); + } + } + @Override public List batchRecognize(List imageList, OcrRecOptions options) { List djlImageList = new ArrayList<>(imageList.size()); @@ -311,25 +348,25 @@ public List batchRecognize(List imageList, OcrRecOptions } catch (Exception e) { throw new OcrException(e); } finally { - djlImageList.forEach(image -> ((Mat)image.getWrappedImage()).release()); + djlImageList.forEach(image -> ((Mat) image.getWrappedImage()).release()); } } @Override public List batchRecognizeDJLImage(List imageList, OcrRecOptions options) { - if(Objects.isNull(textDetModel)){ + if (Objects.isNull(textDetModel)) { throw new OcrException("textDetModel is null"); } OcrRecOptions ocrRecOptions = options; - if(Objects.isNull(options)){ + if (Objects.isNull(options)) { ocrRecOptions = new OcrRecOptions(); } - if(CollectionUtils.isEmpty(imageList)){ + if (CollectionUtils.isEmpty(imageList)) { throw new OcrException("imageList is empty"); } //检测文本 List> boxeList = textDetModel.batchDetectDJLImage(imageList); - if(CollectionUtils.isEmpty(boxeList) || boxeList.size() != imageList.size()){ + if (CollectionUtils.isEmpty(boxeList) || boxeList.size() != imageList.size()) { throw new OcrException("未检测到文本"); } Predictor predictor = null; @@ -338,15 +375,15 @@ public List batchRecognizeDJLImage(List imageList, OcrRecOptions predictor = recPredictorPool.borrowObject(); List allImageAlignList = new ArrayList(); //检测方向 - if(ocrRecOptions.isEnableDirectionCorrect()){ - if(Objects.isNull(directionModel)){ + if (ocrRecOptions.isEnableDirectionCorrect()) { + if (Objects.isNull(directionModel)) { throw new OcrException("请配置方向模型"); } List matList = imageList.stream() - .map(image -> (Mat)image.getWrappedImage()) + .map(image -> (Mat) image.getWrappedImage()) .collect(Collectors.toList()); List> ocrItemList = directionModel.batchDetect(boxeList, matList); - if(CollectionUtils.isEmpty(ocrItemList) || ocrItemList.size() != imageList.size()){ + if (CollectionUtils.isEmpty(ocrItemList) || ocrItemList.size() != imageList.size()) { throw new OcrException("方向检测失败"); } allImageAlignList = new ArrayList(); @@ -358,7 +395,7 @@ public List batchRecognizeDJLImage(List imageList, OcrRecOptions // } allImageAlignList.addAll(imageAlignList); } - }else{ + } else { for (int i = 0; i < boxeList.size(); i++) { Mat srcMat = (Mat) imageList.get(i).getWrappedImage(); List imageAlignList = batchAlign(boxeList.get(i), srcMat, manager); @@ -372,8 +409,8 @@ public List batchRecognizeDJLImage(List imageList, OcrRecOptions int textIndex = 0; for (int i = 0; i < boxeList.size(); i++) { List rotatedBoxes = new ArrayList<>(); - for (int j = 0; j < boxeList.get(i).size(); j++){ - if(textIndex >= textList.size()){ + for (int j = 0; j < boxeList.get(i).size(); j++) { + if (textIndex >= textList.size()) { throw new OcrException("识别失败: 第" + i + "张图片, 第" + j + "个文本块,未识别到文本"); } OcrBox box = boxeList.get(i).get(j); @@ -387,7 +424,7 @@ public List batchRecognizeDJLImage(List imageList, OcrRecOptions return ocrInfoList; } catch (Exception e) { throw new OcrException("OCR检测错误", e); - }finally { + } finally { if (predictor != null) { try { recPredictorPool.returnObject(predictor); //归还 @@ -403,16 +440,16 @@ public List batchRecognizeDJLImage(List imageList, OcrRecOptions } } - private List batchRecognize(List imageAlignList){ + private List batchRecognize(List imageAlignList) { Predictor predictor = null; try { predictor = recPredictorPool.borrowObject(); List textList = predictor.batchPredict(imageAlignList); - imageAlignList.forEach(subImg -> ((Mat)subImg.getWrappedImage()).release()); + imageAlignList.forEach(subImg -> ((Mat) subImg.getWrappedImage()).release()); return textList; } catch (Exception e) { throw new OcrException("OCR检测错误", e); - }finally { + } finally { if (predictor != null) { try { recPredictorPool.returnObject(predictor); //归还