Skip to content
This repository was archived by the owner on Sep 26, 2022. It is now read-only.

Commit 78cfab1

Browse files
#55: Added method to list files in bucket (#60)
* Added listing method * Added test for listing method * Added documentation * Updated type hints
1 parent 574ca33 commit 78cfab1

File tree

9 files changed

+183
-44
lines changed

9 files changed

+183
-44
lines changed

doc/changes/changes_0.2.0.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ Code name: t.b.d
66

77
## Features / Enhancements
88

9+
- #55: Added method to list files in bucket
10+
911
## Bug Fixes
1012

1113
- #54: Removed PosixPath conversion from alter session string
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from pathlib import Path
2+
from exasol_bucketfs_utils_python import upload, list_files
3+
from exasol_bucketfs_utils_python.bucket_config import BucketConfig
4+
from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig
5+
from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig
6+
7+
connection_config = BucketFSConnectionConfig(
8+
host="localhost", port=6666,
9+
user="w", pwd="write",
10+
is_https=False)
11+
bucketfs_config = BucketFSConfig(
12+
connection_config=connection_config,
13+
bucketfs_name="bfsdefault")
14+
bucket_config = BucketConfig(
15+
bucket_name="default",
16+
bucketfs_config=bucketfs_config)
17+
18+
local_input_file_path = Path("local_input_file.txt")
19+
path_in_bucket = "path/in/bucket/file.txt"
20+
upload.upload_file_to_bucketfs(
21+
bucket_config=bucket_config,
22+
bucket_file_path=path_in_bucket,
23+
local_file_path=local_input_file_path)
24+
25+
bucket_file_path = Path("path/in/bucket")
26+
files = list_files.list_files_in_bucketfs(
27+
bucket_config=bucket_config,
28+
bucket_file_path=path_in_bucket)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
2+
#####################################
3+
Listing files in bucket
4+
#####################################
5+
6+
This library provides a function to list the files in the bucket under a given
7+
path. As in the example below, the list of files in the specified bucket
8+
directory is obtained by the provided listing method.
9+
10+
11+
Example:
12+
13+
.. literalinclude:: list_files_in_bucket.py
14+
:language: python3

doc/user_guide/user_guide.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ For a detailed explanation of the API, please refer to our :doc:`API Documentati
1010

1111
upload_download_functions
1212
upload_github_release_to_bucket
13+
list_files_in_bucket
1314

exasol_bucketfs_utils_python/abstract_bucketfs_location.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from abc import ABC, abstractmethod
2-
from typing import Any, Tuple, IO
2+
from typing import Any, Tuple, IO, Iterable
33
from pathlib import PurePosixPath, Path
44
from urllib.parse import ParseResult
55

@@ -65,3 +65,8 @@ def read_file_from_bucketfs_to_fileobj(self,
6565
def read_file_from_bucketfs_via_joblib(self,
6666
bucket_file_path: str) -> Any:
6767
pass
68+
69+
@abstractmethod
70+
def list_files_in_bucketfs(self,
71+
bucket_file_path: str) -> Iterable[str]:
72+
pass
Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Any, Tuple, IO
22
from pathlib import PurePosixPath, Path
33
from urllib.parse import ParseResult
4-
from exasol_bucketfs_utils_python import download, upload
4+
from exasol_bucketfs_utils_python import download, upload, list_files
55
from exasol_bucketfs_utils_python import load_file_from_local_fs as from_BFS
66
from exasol_bucketfs_utils_python.bucket_config import BucketConfig
77

@@ -24,90 +24,98 @@ def __init__(self, bucket_config: BucketConfig, base_path: PurePosixPath):
2424
self.base_path = base_path
2525
self.bucket_config = bucket_config
2626

27-
def get_complete_file_path_in_bucket(self,
28-
bucket_file_path: str) -> str:
27+
def get_complete_file_path_in_bucket(
28+
self,
29+
bucket_file_path: str) -> str:
2930
return str(PurePosixPath(self.base_path, bucket_file_path))
3031

31-
def download_from_bucketfs_to_string(self,
32-
bucket_file_path: str) -> str:
33-
result = download.download_from_bucketfs_to_string(
32+
def download_from_bucketfs_to_string(
33+
self,
34+
bucket_file_path: str) -> str:
35+
return download.download_from_bucketfs_to_string(
3436
self.bucket_config,
3537
self.get_complete_file_path_in_bucket(bucket_file_path)
3638
)
37-
return result
3839

39-
def download_object_from_bucketfs_via_joblib(self,
40-
bucket_file_path: str) -> Any:
41-
result = download.download_object_from_bucketfs_via_joblib(
40+
def download_object_from_bucketfs_via_joblib(
41+
self,
42+
bucket_file_path: str) -> Any:
43+
return download.download_object_from_bucketfs_via_joblib(
4244
self.bucket_config,
4345
self.get_complete_file_path_in_bucket(bucket_file_path)
4446
)
45-
return result
4647

47-
def upload_string_to_bucketfs(self,
48-
bucket_file_path: str,
49-
string: str) -> \
50-
Tuple[ParseResult, PurePosixPath]:
51-
result = upload.upload_string_to_bucketfs(
48+
def upload_string_to_bucketfs(
49+
self,
50+
bucket_file_path: str,
51+
string: str) -> Tuple[ParseResult, PurePosixPath]:
52+
return upload.upload_string_to_bucketfs(
5253
self.bucket_config,
5354
self.get_complete_file_path_in_bucket(bucket_file_path),
5455
string
5556
)
56-
return result
5757

58-
def upload_object_to_bucketfs_via_joblib(self, object: Any,
59-
bucket_file_path: str,
60-
**kwargs) -> \
61-
Tuple[ParseResult, PurePosixPath]:
62-
result = upload.upload_object_to_bucketfs_via_joblib(
58+
def upload_object_to_bucketfs_via_joblib(
59+
self, object: Any,
60+
bucket_file_path: str,
61+
**kwargs) -> Tuple[ParseResult, PurePosixPath]:
62+
return upload.upload_object_to_bucketfs_via_joblib(
6363
object,
6464
self.bucket_config,
6565
self.get_complete_file_path_in_bucket(bucket_file_path),
6666
**kwargs
6767
)
68-
return result
6968

70-
def upload_fileobj_to_bucketfs(self,
71-
fileobj: IO,
72-
bucket_file_path: str) -> \
73-
Tuple[ParseResult, PurePosixPath]:
74-
result = upload.upload_fileobj_to_bucketfs(
69+
def upload_fileobj_to_bucketfs(
70+
self,
71+
fileobj: IO,
72+
bucket_file_path: str) -> Tuple[ParseResult, PurePosixPath]:
73+
return upload.upload_fileobj_to_bucketfs(
7574
self.bucket_config,
7675
self.get_complete_file_path_in_bucket(bucket_file_path),
7776
fileobj
7877
)
79-
return result
8078

81-
def read_file_from_bucketfs_to_string(self,
82-
bucket_file_path: str) -> str:
83-
result = from_BFS.read_file_from_bucketfs_to_string(
79+
def read_file_from_bucketfs_to_string(
80+
self,
81+
bucket_file_path: str) -> str:
82+
return from_BFS.read_file_from_bucketfs_to_string(
8483
self.get_complete_file_path_in_bucket(bucket_file_path),
8584
self.bucket_config
8685
)
87-
return result
8886

89-
def read_file_from_bucketfs_to_file(self,
90-
bucket_file_path: str,
91-
local_file_path: Path) -> None:
87+
def read_file_from_bucketfs_to_file(
88+
self,
89+
bucket_file_path: str,
90+
local_file_path: Path) -> None:
9291
from_BFS.read_file_from_bucketfs_to_file(
9392
self.get_complete_file_path_in_bucket(bucket_file_path),
9493
self.bucket_config,
9594
local_file_path
9695
)
9796

98-
def read_file_from_bucketfs_to_fileobj(self,
99-
bucket_file_path: str,
100-
fileobj: IO) -> None:
97+
def read_file_from_bucketfs_to_fileobj(
98+
self,
99+
bucket_file_path: str,
100+
fileobj: IO) -> None:
101101
from_BFS.read_file_from_bucketfs_to_fileobj(
102102
self.get_complete_file_path_in_bucket(bucket_file_path),
103103
self.bucket_config,
104104
fileobj
105105
)
106106

107-
def read_file_from_bucketfs_via_joblib(self,
108-
bucket_file_path: str) -> Any:
109-
result = from_BFS.read_file_from_bucketfs_via_joblib(
107+
def read_file_from_bucketfs_via_joblib(
108+
self,
109+
bucket_file_path: str) -> Any:
110+
return from_BFS.read_file_from_bucketfs_via_joblib(
110111
self.get_complete_file_path_in_bucket(bucket_file_path),
111112
self.bucket_config
112113
)
113-
return result
114+
115+
def list_files_in_bucketfs(
116+
self,
117+
bucket_file_path: str) -> list:
118+
return list_files.list_files_in_bucketfs(
119+
self.bucket_config,
120+
self.get_complete_file_path_in_bucket(bucket_file_path)
121+
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from typing import Iterable
2+
import requests
3+
from pathlib import Path
4+
from exasol_bucketfs_utils_python.bucket_config import BucketConfig
5+
from exasol_bucketfs_utils_python import bucketfs_utils
6+
from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url
7+
8+
9+
def list_files_in_bucketfs(bucket_config: BucketConfig,
10+
bucket_file_path: str = "") -> Iterable[str]:
11+
"""
12+
List files at the specified path in the bucket in BucketFs, line by line.
13+
14+
:param bucket_config: BucketConfig for the bucket to download from
15+
:param bucket_file_path: Path in the bucket to download the file from
16+
:return: The list of the files in the BucketFS as string.
17+
"""
18+
if bucket_file_path is None:
19+
raise ValueError("bucket_file_path can't be None")
20+
url = generate_bucket_http_url(bucket_config, "")
21+
auth = bucketfs_utils.create_auth_object(bucket_config)
22+
response = requests.get(url.geturl(), auth=auth)
23+
response.raise_for_status()
24+
25+
bucket_file_path_parts = Path(bucket_file_path).parts
26+
files = []
27+
for path in response.text.split():
28+
path_parts = Path(path).parts
29+
if path_parts[:len(bucket_file_path_parts)] == bucket_file_path_parts:
30+
relevant_parts = path_parts[len(bucket_file_path_parts):]
31+
relevant_path = str(Path(*relevant_parts))
32+
files.append(relevant_path)
33+
34+
return files

exasol_bucketfs_utils_python/localfs_mock_bucketfs_location.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,9 @@ def read_file_from_bucketfs_via_joblib(self,
8181
result = joblib.load(
8282
self.get_complete_file_path_in_bucket(bucket_file_path))
8383
return result
84+
85+
def list_files_in_bucketfs(self,
86+
bucket_file_path: str) -> list:
87+
path = self.get_complete_file_path_in_bucket(bucket_file_path)
88+
Path(path).parent.mkdir(parents=True, exist_ok=True)
89+
return ["."]

tests/test_upload_list.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from exasol_bucketfs_utils_python import upload, list_files
2+
from exasol_bucketfs_utils_python.bucket_config import BucketConfig
3+
from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig
4+
from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig
5+
from tests.test_load_fs_file_from_udf import delete_testfile_from_bucketfs
6+
7+
8+
def test_list_files():
9+
connection_config = BucketFSConnectionConfig(
10+
host="localhost", port=6666, user="w", pwd="write", is_https=False)
11+
bucketfs_config = BucketFSConfig(
12+
connection_config=connection_config, bucketfs_name="bfsdefault")
13+
bucket_config = BucketConfig(
14+
bucket_name="default", bucketfs_config=bucketfs_config)
15+
test_string = "test_string"
16+
17+
path_list = ["path/in/bucket/file.txt", "path/file2.txt"]
18+
try:
19+
for path_in_bucket in path_list:
20+
upload.upload_string_to_bucketfs(
21+
bucket_config=bucket_config,
22+
bucket_file_path=path_in_bucket,
23+
string=test_string)
24+
25+
bucket_file_path_map = {
26+
"path": ["in/bucket/file.txt", "file2.txt"],
27+
"path/": ["in/bucket/file.txt", "file2.txt"],
28+
"path/in": ["bucket/file.txt"],
29+
"path/in/": ["bucket/file.txt"],
30+
"path/in/bucket": ["file.txt"],
31+
"path/in/bucket/": ["file.txt"],
32+
"path/in/bucket/file.txt": ["."]
33+
}
34+
for bucket_path, expected in bucket_file_path_map.items():
35+
assert expected == list_files.list_files_in_bucketfs(
36+
bucket_config, bucket_path)
37+
finally:
38+
for path_in_bucket in path_list:
39+
delete_testfile_from_bucketfs(
40+
file_path=path_in_bucket,
41+
bucket_config=bucket_config)

0 commit comments

Comments
 (0)