๐ก Apps Script๋ฅผ ์ด์ฉํด ์นํ์ด์ง๋ฅผ ๋ง๋ค๊ณ ,
์นํ์ด์ง์์ PDF๋ ์ด๋ฏธ์ง์ URL์ ์ ๋ ฅ ๋ฐ์ ๊ฒฐ๊ณผ๋ฅผ ์นํ์ด์ง์ ํ ์คํธ๋ก ๋ฟ๋ ค์ฃผ๋ ๊ธฐ๋ฅ์ ๊ตฌํํด๋ณธ๋ค.
์๋์ ๋๊ฐ์ง๋ฅผ ์์ฉํด๋ณธ๋ค.
Apps Script๋ก ์ด๋ฏธ์งํ์ผ/PDF ํ์ผ OCR ํ๊ธฐ
๐กApps Script๋ฅผ ์ด์ฉํด ์ด๋ฏธ์ง๋ pdfํ์ผ์์ ํ ์คํธ๋ฅผ ์ถ์ถํด๋ณธ๋ค. ์๋๋ฐฉ์์ ์ด๋ฏธ์ง๋ pdf ํ์ผ์ Google Drive์ OCR ๋ ํํ์ ์์ ํ์ผ๋ก upload ํ๋ ๋ฐฉ์์ด๋ค. Apps Script ์ข์ธก ๋ฉ๋ด์์ ์๋น์ค์ [
whiseung.tistory.com
Apps Script๋ก ๊ฐ๋จํ ์นํ์ด์ง ์์ฑํ๊ธฐ
Apps Script๋ก ๊ฐ๋จํ ์นํ์ด์ง๋ฅผ ์์ฑ ํ ์ ์๋ค. ์ด ์นํ์ด์ง์์ Bigquery, Database, Google sheets๋ก ๋ฐ์ดํฐ๋ฅผ ์ ๋ ฅ(์ ๋ ฅ)ํ ์ ์๋ค. Simple Trigger์ธ doGet()๋ฅผ ์ด์ฉํ๋ค. function doGet(e) { return HtmlService.createT
whiseung.tistory.com
Code.gs
function doGet(e) {
Logger.log(JSON.stringify(e));
var htmlOutput = HtmlService.createTemplateFromFile('inputUrl.html');
htmlOutput.url = getUrl();
const input_url = e.parameter['input_url'];
result = readTextFromFile(input_url);
htmlOutput.result = result;
Logger.log("Input Url = " + input_url);
Logger.log("Ocr result = " + result);
return htmlOutput.evaluate();
}
function getUrl() {
const url = ScriptApp.getService().getUrl();
return url;
}
function readTextFromFile(url) {
// ์ด๊ธฐ์คํ ์ ์์ธ์ฒ๋ฆฌ
if ( !url ) {
return;
}
const contentBlob = UrlFetchApp.fetch(url).getBlob();
const resource = {
title : contentBlob.getName(),
mimeType : contentBlob.getContentType()
}
const options = {
ocr : true
}
const docFile = Drive.Files.insert(resource, contentBlob, options);
const doc = DocumentApp.openById(docFile.id);
const text = doc.getBody().getText();
Drive.Files.remove(docFile.id);
return text;
}
inputUrl.html
<!DOCTYPE html>
<html>
<head>
<base target="_top">
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-eOJMYsd53ii+scO/bJGFsiCZc+5NDVN2yr8+0RDqr0Ql0h+rP48ckxlpbzKgwra6" crossorigin="anonymous">
<!-- <script type="text/javascript">
alert("ddd");
</script -->
</head>
<body>
<form action="<?= url ?>" method="GET">
<div class="container">
<div class="row frame">
<h5 class="mt-4 text-center">URL to OCR</h5>
<h6 class="mb-4 text-center">์
๋ ฅํ URL ์ PDF, Image์ ๋ํ ๊ฒฐ๊ณผ๋ฅผ OCR๋ก ๋ณด์ฌ์ค</h6>
<!-- create form element here -->
<div class="form-group mb-4 box">
<input type="text" class="form-control inp mb-3" id="input_url" name="input_url" placeholder="์
๋ ฅ" autocomplete="off">
</div>
<!-- create form until element here -->
<sapn><?= result ?></sapn>
<div class="form-group mt-4 mb-4 text-center">
<input type="submit" class="btn btn-info" name="Submit" /><br>
</div>
</div>
</div>
</form>
<script src="https://cdn.jsdelivr.net/npm/@popperjs/core@2.9.1/dist/umd/popper.min.js" integrity="sha384-SR1sx49pcuLnqZUnnPwx6FCym0wLsk5JZuNx2bPPENzswTNFaQU1RDvt3wT4gWFG" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/js/bootstrap.min.js" integrity="sha384-j0CNLUeiqtyaRmlzUHCPZ+Gy5fQu0dQ6eZ/xAww941Ai1SxSY+0EQqNXNE6DZiVc" crossorigin="anonymous"></script>
</body>
</html>
์ด๋ฏธ์ง URL
https://i.stack.imgur.com/i1Abv.png
PDF OCR
https://www.africau.edu/images/default/sample.pdf
https://script.google.com/macros/s/AKfycbzY4RxgEuLb4rWM5Kk681H1UJZXAuTvPnJ108NINF6vVy3nA0uPIKYzPU7VHoBBHBsNQg/exec
script.google.com