본문 바로가기

졸업프로젝트/YOLO

First try

728x90

좀 이상적인 케이스긴한데 성공

import cv2
import numpy as np
import json
from darkflow.net.build import TFNet
import matplotlib.pyplot as plt
from collections import OrderedDict
import requests
import sys

#모델불러오기
options = {"model": "./cfg/my-tiny-yolo.cfg",
           "pbLoad": "./darkflow/built_graph/my-tiny-yolo.pb",
           "metaLoad": './darkflow/built_graph/my-tiny-yolo.meta' , "threshold": 0.4
          }

tfnet = TFNet(options)



#이미지 불러오기

img = cv2.imread('./img/img2.jpg',cv2.IMREAD_COLOR)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img2 = cv2.imread('./img/img2.jpg',cv2.IMREAD_COLOR)
img2 = cv2.cvtColor(img2,cv2.COLOR_BGR2RGB)
img3 = cv2.imread('./img/img2.jpg',cv2.IMREAD_COLOR)
img3 = cv2.cvtColor(img2,cv2.COLOR_BGR2RGB)
img4 = cv2.imread('./img/img2.jpg',cv2.IMREAD_COLOR)
img4 = cv2.cvtColor(img2,cv2.COLOR_BGR2RGB)
results = tfnet.return_predict(img)




colors = [tuple(255*np.random.rand(3)) for _ in range(50)]

#detect 실행 
results = tfnet.return_predict(img)

#결과출력 
for color, result in zip(colors, results):
    t1 = (result['topleft']['x'], result['topleft']['y'])
    br = (result['bottomright']['x'], result['bottomright']['y'])
    label = result['label']
    
    img = cv2.rectangle(img, t1, br,color, 7)
    img = cv2.putText(img, label, t1, cv2.FONT_HERSHEY_COMPLEX, 1, (0,0,0),2)
    print(result)
    
#결과 출력버전 시각화 
cv2.imshow("No processing",img)
k = cv2.waitKey(0)
cv2.destroyWindow("No processing")


#번호순서대로 정렬을 위해서 
result_fix2 = sorted(results, key=lambda result: (result['bottomright']['x']))
left=[]
right=[]
half = img.shape[1]
print(half)

for result in (result_fix2):
    
    if(result['bottomright']['x']<half/2):
        left.append(result)
    else:
        right.append(result)
        


left = sorted(left, key=lambda result: (result['bottomright']['y']))
right = sorted(right, key=lambda result: (result['bottomright']['y']))

print("LEFT =============================")
print(left)
print("RIGHT =============================")
print(right)



colors = [tuple(255*np.random.rand(3)) for _ in range(50)]


for color, result in zip(colors, left):
    t1 = (result['topleft']['x'], result['topleft']['y'])
    br = (result['bottomright']['x'], result['bottomright']['y'])
    label = result['label']
    
    img2 = cv2.rectangle(img2, t1, br,color, 7)
    img2 = cv2.putText(img2, label, t1, cv2.FONT_HERSHEY_COMPLEX, 1, (0,0,0),2)
    print(result)

cv2.imshow("LEFT",img2)
k = cv2.waitKey(0)
cv2.destroyWindow("LEFT")



colors = [tuple(255*np.random.rand(3)) for _ in range(50)]


for color, result in zip(colors, right):
    t1 = (result['topleft']['x'], result['topleft']['y'])
    br = (result['bottomright']['x'], result['bottomright']['y'])
    label = result['label']
    
    img3 = cv2.rectangle(img3, t1, br,color, 7)
    img3 = cv2.putText(img3, label, t1, cv2.FONT_HERSHEY_COMPLEX, 1, (0,0,0),2)

cv2.imshow("RIGHT",img3)
k = cv2.waitKey(0)
cv2.destroyWindow("RIGHT")
    
#하나로 합치기
left.extend(right)

result_fix3 = left

print(result_fix3)


index = 0

for result in (result_fix3):
    if(index == 0):
        label_past = result['label']
        size_past = ((result['bottomright']['x']-result['topleft']['x'])**2 + (result['topleft']['y']-result['bottomright']['y'])**2)**0.5
    else:
        label_now = result['label']
        size_now = ((result['bottomright']['x']-result['topleft']['x'])**2 + (result['topleft']['y']-result['bottomright']['y'])**2)**0.5
         
        if(label_now=='spn' or label_now=='short_ans' or label_now=='page_num'):
            if(label_now == label_past):
                if(size_now<size_past):
                    print("delete index : ")
                    print(index)
                    print(result_fix[index])
                    del(result_fix3[index])
                else :
                    print("delete index : ")
                    print(index-1)
                    print(result_fix3[index-1])
                    del(result_fix3[index-1])
                        
        label_past = label_now
        size_past = size_now
    index = index + 1
    

##자른이미지 오씨알돌리기 

LIMIT_PX = 1024
LIMIT_BYTE = 1024*1024
LIMIT_BOX = 40

rest_api_key = '7bb64904f647afee944a64c5c4126c44'


def kakao_ocr_resize(image_path: str):
    image= image_path
    height, width, _ = image.shape
    print(f"height : {height}, width : {width}")
    #cv2.imshow("img",image)
    #k = cv2.waitKey(0)
    
    if LIMIT_PX < height or LIMIT_PX < width:
        ratio = float(LIMIT_PX)/ max(height, width)
        image = cv2.resize(image, None, fx=ratio, fy=ratio)
        height, width, _ = height, width, _ = image.shape
        
        image_path = "{}_resized.jpg".format(image_path)
        cv2.imwrite(image_path, image)
        
        return image_path
    return None


def kakao_ocr(image_path: str, appkey: str):
    API_URL = 'https://dapi.kakao.com/v2/vision/text/ocr'
    
    headers = {'Authorization' : 'KakaoAK {}'.format(appkey)}
    
    image = image_path
    jpeg_image = cv2.imencode(".jpg",image)[1]
    data = jpeg_image.tobytes()
    
    return requests.post(API_URL, headers=headers, files={"image": data})

def read_ocr(image):
    
    image_path = image
    resize_impath = kakao_ocr_resize(image_path)
    print(resize_impath)
    
    if resize_impath is not None:
        image_path = resize_impath
        print("using resized image")
        
    output = kakao_ocr(image_path, rest_api_key).json()
    #print("------------------------------------")
    #print("[OCR] output:\n{}\n".format(json.dumps(output, sort_keys=True, indent=2)))
    #print("--------recognition_words----------------")

    words = output['result'][0]['recognition_words'][0]
    #print(words)
    return words
    
index = 0

for result in (result_fix3):
    t1 = (result['topleft']['x'], result['topleft']['y'])
    br = (result['bottomright']['x'], result['bottomright']['y'])
    xa = t1[0]
    xb = br[0]
    ya = t1[1]
    yb = br[1]
    if(result['label']=='spn' or result['label']=='short_ans' or result['label']=='page_num'):
        #print(result)
        cropped_img = img4[ya-20:yb+20, xa-20:xb+20]
        cv2.imshow("img20",cropped_img)
        k = cv2.waitKey(0)
        cv2.destroyWindow("img2")
        recognition_words = read_ocr(cropped_img)
        result['recognition_words'] = recognition_words
    else:
        result['recognition_words'] = 'null'
    
    print(result)

    
print("===============================")
print(result_fix3)

 

728x90