解析器合约示例代码
- Updated: 2025/04/18
作为 CoE 负责人,您必须为解析器 package 提供输入和输出合约,以便将其与 Document Automation 集成。
输入和输出合约的示例代码如下所示:
输入合约
package aws; import com.automationanywhere.botcommand.data.impl.StringValue; import com.automationanywhere.commandsdk.annotations.*; import com.automationanywhere.commandsdk.annotations.rules.LocalFile; import com.automationanywhere.commandsdk.annotations.rules.NotEmpty; import com.automationanywhere.commandsdk.model.AttributeType; import com.automationanywhere.core.security.SecureString; import static com.automationanywhere.commandsdk.model.DataType.STRING; @BotCommand @CommandPkg( name = "ExtractionCommand", label = "Extraction Command", description = "Extraction Command", node_label = "Extraction Command", return_type = STRING, return_label = "Extraction Command Response", minimum_botagent_version = "21.98", minimum_controlroom_version = "10520") public class ExtractionCommand { @Execute public StringValue compute( @Idx(index = "1", type = AttributeType.FILE) @LocalFile @Pkg(label = "Image File Path") @NotEmpty final String inputFilePath, @Idx(index = "2", type = AttributeType.CREDENTIAL) @Pkg(label = "Service Account") final SecureString serviceAccount) { String secureString = serviceAccount.getInsecureString(); return new StringValue(""); } }
输出合约
命令响应必须是 JSON 格式,并具有 Document Automation 支持的适当模式。 以下是由该命令生成的 JSON 输出的模式:
{ "metadata": { "documentId":“用于在DA中识别文档的唯一ID”,"filepath":“输入文件路径是指 Bot Runner 在执行期间使用的路径”,"executionStatus": { "statusCode":“指示提取后响应的状态代码”,"statusMessage":“指示提取后响应的状态消息”,"message":“详细说明结果的消息” },"timeInMs":“处理整个文档所需的时间(毫秒)”,"clusterId":“用于捕获启发式反馈的唯一 ID”,"numberOfPages":“文档中的页数” },"imagePreprocessingResult": { "metadata": { "documentId":“用于识别 DA 中文档的唯一 ID”,"filepath":“输入文件路径是指 Bot Runner 在执行期间使用的路径”,"executionStatus": { "statusCode":“指示提取后响应的状态代码”,"statusMessage":“指示提取后响应的状态消息” "message":“详细说明结果的消息” },"timeInMs":“处理整个文档所需的时间(毫秒)”},"pages": [ { "filepath": “指在将文档拆分为页面后指向文档中页面的文件路径”,"deskew": 真,"orientation": 真,"renderDpi": 0,"width":“页面的宽度”,"height":“页面的高度” } ] }, "ocrResult": { "metadata": { "executionStatus": { "statusCode":“指示提取后响应的状态代码”,"statusMessage":“指示提取后响应的状态消息”,"message":“详细说明结果的消息” },"numberOfPages": “文档中的页数”,"learningInstanceSetting": { "provider":“提供商的名称”,"version":“提供商版本”,"langCodes": [“DA 中的语言代码” ] },"timeInMs":“获取文档 OCR 结果所需的时间(毫秒)”,"pages": [ { "id": “用于识别文档中页面的唯一ID”,"pageNum":“文档中的页码”,"filepath":“指在将文档拆分为页面后指向文档中页面的文件路径“,"langCode":“指示 DA 中语言的代码” } ] },"blocks": [ { "id": “单词段的块 ID”,"geometry": { "x1":“边框矩形的 x1 轴”,"y1":“边框矩形的 y1 轴”,"x2":“边框矩形的 x2轴”,"y2":“边框矩形的 y3 轴”},"text":“段中文本”,"blockType":“块的类型”。 可能的选项: WORD/LINE/INFO_BLOCK/KEY_VAL_BLOCK/TABLE/TABLE_HEADER /TABLE_HEADER_ELEM/COLUMN/KEY_INFO_BLOCK/NO_OBJECT","pageNum":“文档中的页码”,"confidence":“OCR 置信度” } ] },"docDetectResult": { "metadata": { "executionStatus": { "statusCode":“指示提取后响应的状态代码”,"statusMessage":“指示提取后响应的状态消息”,"message":“详细说明结果的消息”}, "timeInMs":“处理整个文档所花费的时间(毫秒)” },"featureObjects": [ { "id":“功能对象 UUID”,"blockType":“INFO_BLOCK”,"geometry": { "x1":“矩形边框的 x1 轴”,"y1":“矩形边框的 y1 轴”,"x2":“矩形边框的 x2 轴”,"y2":“矩形边框的 y3 轴” },"text":“功能对象文本”,"confidence":“置信度”,"ocrConfidence":“OCR 置信度”,"pageNum":“文档中的页码” } ] },"extractionResult": { "metadata": { "filepath":“输入文件路径是指 Bot runner 在执行期间使用的路径”,"executionStatus": { "statusCode":“指示提取后响应的状态代码”,"statusMessage":“指示提取后响应的状态消息”,"message":“详细说明结果的消息”},"timeInMs":“处理整个文档所花费的时间(毫秒)”,"pages": [ { "id":“用于识别文档中页面的唯一 ID”,"pageNum":“文档中的页码”,"filepath":“指向文档中页面的文件路径(在将文档拆分为页面后)”,"width":“页面的宽度”,"height":“页面的高度”"langCode":“指代 DA 中语言的代码”} ] },"keyValueFeatures": [ { "id":“用于识别文档中找到的键值的唯一 ID”,"domainFieldKey":“需要提取的字段”,"geometry": { "x1":“矩形边框的 x1 轴”,"y1":“矩形边框的 y1 轴”,"x2":“矩形边框的 x2 轴”,"y2":“矩形边框的 y3 轴”},"text":“段中文本”,"pageNum":“文档中的页码”,"ocrConfidence": 0.909,"extractionScore":“OCR 置信度”,"key": { "id":“用于识别文档中找到的键的唯一 ID”,"text":“段中文本”,"geometry": { "x1":“矩形边框的 x1 轴”,"y1":“矩形边框的 y1 轴”,"x2":“矩形边框的 x2 轴”,"y2":“矩形边框的 y3 轴”},"extractionScore":“提取分数”},"value": { "id":“用于识别文档中找到的值的唯一 ID”,"text":“段中文本”,"geometry": { "x1":“矩形边框的 x1 轴”,"y1":“矩形边框的 y1 轴”,"x2":“矩形边框的 x2 轴”,"y2":“矩形边框的 y3 轴”},"extractionScore":“提取分数”},"extractedDataType":“提取字段的数据类型”} ],"tableFeatures": [ { "id":“用于识别文档中表格的唯一 ID”,"headers": [ { "id":“用于识别文档中标题列的唯一 ID”,"domainFieldKey":“需要提取的标题字段”,"geometry": { "x1":“矩形边框的 x1 轴”,"y1":“矩形边框的 y1 轴”,"x2":“矩形边框的 x2 轴”,"y2":“矩形边框的 y3 轴”},"text":“段中文本”,"pageNum":“文档中的页码”,"ocrConfidence":“OCR 置信度”,"extractionScore":“提取分数”,} ],"rows": [ { "id":“用于识别表格中行的唯一 ID”,"geometry": { "x1":“矩形边框的 x1 轴”,"y1":“矩形边框的 y1 轴”,"x2":“矩形边框的 x2 轴”,"y2":“矩形边框的 y3 轴”},"cells": [ { "id":“用于识别行中单元格的唯一 ID”,"geometry": { "x1":“矩形边框的 x1 轴”,"y1":“矩形边框的 y1 轴”,"x2":“矩形边框的 x2 轴”,"y2":“矩形边框的 y3 轴”},"domainFieldKey":“需要提取的列字段”,"text":“段中文本”,"pageNum":“文档中的页码”,"extractedDataType":“文本”,"ocrConfidence":“OCR 置信度”,"extractionScore":“提取分数”,} ],"pageNum": 1 } ] } ] } }