Azure
Form Recognizer로 Azure Blob Storage의 파일을 읽어 텍스트 추출해보기
whistory
2023. 6. 28. 13:47
반응형
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.storage.blob import ContainerClient
endpoint = "FORM_RECOGNIZER_END_POINT"
key = "FORM_RECOGNIZER_KEY"
def get_blob_url():
STORAGE_CONSTR = "BLOB_STORAGE_CONNECTION_STRING"
SOURCE_NAME = "BLOB_STORAGE_CONTAINER_NAME"
FILE_NAME = "sample.pdf"
container = ContainerClient.from_connection_string(
conn_str=STORAGE_CONSTR,
container_name=SOURCE_NAME
)
blob_list = container.list_blobs()
blob_url = container.url
for blob in blob_list:
if blob.name == FILE_NAME:
formUrl = blob_url+"/"+blob.name
print(formUrl)
return formUrl
def analyze_read():
formUrl = get_blob_url()
print("========= Process start ==========\\n")
document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
poller = document_analysis_client.begin_analyze_document_from_url("prebuilt-read", formUrl)
result = poller.result()
print("Document contains content: ", result.content)
print("========= Process end ==========\\n")
if __name__ == "__main__":
analyze_read()
반응형