From bccfbb9870e0f05cbf7a3926ea1cd3c1398fd30a Mon Sep 17 00:00:00 2001 From: youngfreeFJS <471011042@qq.com> Date: Fri, 19 Jul 2024 11:47:26 +0800 Subject: [PATCH 1/2] feat: add image compress function to help llm api resp quickly. --- config.yaml | 3 ++- scripts/model.py | 9 +++++++-- scripts/utils.py | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/config.yaml b/config.yaml index d0c1ad6..f7add51 100644 --- a/config.yaml +++ b/config.yaml @@ -16,4 +16,5 @@ ANDROID_XML_DIR: "/sdcard" # Set the directory on your Android device to store DOC_REFINE: false # Set this to true will make the agent refine existing documentation based on the latest demonstration; otherwise, the agent will not regenerate a new documentation for elements with the same resource ID. MAX_ROUNDS: 20 # Set the round limit for the agent to complete the task DARK_MODE: false # Set this to true if your app is in dark mode to enhance the element labeling -MIN_DIST: 30 # The minimum distance between elements to prevent overlapping during the labeling process \ No newline at end of file +MIN_DIST: 30 # The minimum distance between elements to prevent overlapping during the labeling process +SNAPSHOT_COMPRESS_MEGABYTE_SIZE: 0.5 # The expected size (Megabyte) of screenshot compression will be uploaded to LLM, and compressing the image will help the interface respond quickly. \ No newline at end of file diff --git a/scripts/model.py b/scripts/model.py index bf632db..ba23c16 100644 --- a/scripts/model.py +++ b/scripts/model.py @@ -6,7 +6,12 @@ import requests import dashscope -from utils import print_with_color, encode_image +from config import load_config +configs = load_config() +DEFAULT_SNAPSHOT_MEGABYTES = configs["SNAPSHOT_COMPRESS_MEGABYTE_SIZE"] + + +from utils import print_with_color, encode_image, compress_image_size class BaseModel: @@ -35,7 +40,7 @@ def get_model_response(self, prompt: str, images: List[str]) -> (bool, str): } ] for img in images: - base64_img = encode_image(img) + base64_img = encode_image(compress_image_size(img, DEFAULT_SNAPSHOT_MEGABYTES)) content.append({ "type": "image_url", "image_url": { diff --git a/scripts/utils.py b/scripts/utils.py index ec19c45..01aaa6d 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -1,3 +1,4 @@ +import os import base64 import cv2 import pyshine as ps @@ -98,3 +99,40 @@ def get_unit_len(n): def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') + + +def get_image_megabyte_size(image_path: str) -> int: + ''' + Get image size (Megabyte). + ''' + return os.stat(image_path).st_size / 1000 / 1000 + + +def compress_image_size(image_path: str, expect_megabyte: int) -> str: + ''' + Compress image size. + Compress image size to reduce prompt volume, and decrease AI(openai, qwen, etc...) interface RT. + + Args: + image_path (str): image original abs path. + expect_megabyte (int): expect compress size in mega byte. + + Returns: + str: compressed image path. + ''' + + quality: int = 95 + + image_reader = cv2.imread(image_path) + + compressed_image_path: str = os.path.splitext(image_path)[0]+'_compression.jpg' + + while quality > 10: + cv2.imwrite(compressed_image_path, image_reader, [cv2.IMWRITE_JPEG_QUALITY, quality]) + current_megabyte_size: int = get_image_megabyte_size(compressed_image_path) + print_with_color(f'compress image size to: {get_image_megabyte_size(compressed_image_path)} MB.') + if get_image_megabyte_size(compressed_image_path) <= expect_megabyte: + break + quality -= 10 if current_megabyte_size >= 6.5 else 5 + open(compressed_image_path, 'rb') + return compressed_image_path From 5e1ad0c220c36f8eebe57988c650814c233c698d Mon Sep 17 00:00:00 2001 From: youngfreeFJS <471011042@qq.com> Date: Fri, 19 Jul 2024 11:56:30 +0800 Subject: [PATCH 2/2] feat: add use compress image size switch config in yaml. --- config.yaml | 1 + scripts/model.py | 8 ++++++-- scripts/utils.py | 12 ++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/config.yaml b/config.yaml index f7add51..0a34374 100644 --- a/config.yaml +++ b/config.yaml @@ -17,4 +17,5 @@ DOC_REFINE: false # Set this to true will make the agent refine existing docume MAX_ROUNDS: 20 # Set the round limit for the agent to complete the task DARK_MODE: false # Set this to true if your app is in dark mode to enhance the element labeling MIN_DIST: 30 # The minimum distance between elements to prevent overlapping during the labeling process +USE_SNAPSHOT_COMPRESS: true # compress the snapshot image size, will help the interface respond quickly. SNAPSHOT_COMPRESS_MEGABYTE_SIZE: 0.5 # The expected size (Megabyte) of screenshot compression will be uploaded to LLM, and compressing the image will help the interface respond quickly. \ No newline at end of file diff --git a/scripts/model.py b/scripts/model.py index ba23c16..a4240f6 100644 --- a/scripts/model.py +++ b/scripts/model.py @@ -8,7 +8,8 @@ from config import load_config configs = load_config() -DEFAULT_SNAPSHOT_MEGABYTES = configs["SNAPSHOT_COMPRESS_MEGABYTE_SIZE"] +DEFAULT_SNAPSHOT_MEGABYTES: int = configs["SNAPSHOT_COMPRESS_MEGABYTE_SIZE"] +USE_SNAPSHOT_COMPRESS: bool = configs["USE_SNAPSHOT_COMPRESS"] from utils import print_with_color, encode_image, compress_image_size @@ -40,7 +41,10 @@ def get_model_response(self, prompt: str, images: List[str]) -> (bool, str): } ] for img in images: - base64_img = encode_image(compress_image_size(img, DEFAULT_SNAPSHOT_MEGABYTES)) + if USE_SNAPSHOT_COMPRESS: + base64_img = encode_image(compress_image_size(img, DEFAULT_SNAPSHOT_MEGABYTES)) + else: + base64_img = encode_image(img) content.append({ "type": "image_url", "image_url": { diff --git a/scripts/utils.py b/scripts/utils.py index 01aaa6d..1104ef4 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -119,6 +119,18 @@ def compress_image_size(image_path: str, expect_megabyte: int) -> str: Returns: str: compressed image path. + + Example: + + ``` + ls -al '/Users/.../github/appAgentFork/AppAgent/apps/X/demos/self_explore_2024-07-19_11-49-26' total 8440 + drwxr-xr-x@ 6 youngfreefjs staff 192 7 19 11:49 . + drwxr-xr-x@ 4 youngfreefjs staff 128 7 19 11:50 .. + -rw-r--r--@ 1 youngfreefjs staff 92927 7 19 11:49 1.xml + -rw-r--r--@ 1 youngfreefjs staff 1703275 7 19 11:49 1_before.png + -rw-r--r--@ 1 youngfreefjs staff 1995296 7 19 11:49 1_before_labeled.png + -rw-r--r--@ 1 youngfreefjs staff 459612 7 19 11:50 1_before_labeled_compression.jpg + ``` ''' quality: int = 95