Azure

Form Recognizer로 Azure Blob Storage의 파일을 읽어 텍스트 추출해보기

whistory 2023. 6. 28. 13:47
반응형

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.storage.blob import ContainerClient

endpoint = "FORM_RECOGNIZER_END_POINT"
key = "FORM_RECOGNIZER_KEY"

def get_blob_url():
    STORAGE_CONSTR = "BLOB_STORAGE_CONNECTION_STRING"
    SOURCE_NAME = "BLOB_STORAGE_CONTAINER_NAME"
    FILE_NAME = "sample.pdf"

    container = ContainerClient.from_connection_string(
        conn_str=STORAGE_CONSTR,
        container_name=SOURCE_NAME
    )

    blob_list = container.list_blobs()
    blob_url = container.url
        
    for blob in blob_list:
        if blob.name == FILE_NAME:
            formUrl = blob_url+"/"+blob.name 

    print(formUrl)
    return formUrl

def analyze_read():
    formUrl = get_blob_url()

    print("========= Process start ==========\\n")
    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    
    poller = document_analysis_client.begin_analyze_document_from_url("prebuilt-read", formUrl)
    result = poller.result()

    print("Document contains content: ", result.content)
    print("========= Process end  ==========\\n")

if __name__ == "__main__":
    analyze_read()

 

 

 

 

 

 

 

 

 

 

 

반응형