Skip to content

Commit 8bdf418

Browse files
authored
token based login for flash scicat (#347)
* token based login for flash scicat * add test file for metadata * add requests-mock pytest fixture * fix the merge conflict * fix version problems * option for user to add token from main interface * fix the lint error * fix the kwds
1 parent 23da8fa commit 8bdf418

File tree

5 files changed

+132
-52
lines changed

5 files changed

+132
-52
lines changed

poetry.lock

Lines changed: 48 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ mypy = ">=1.6.0"
5757
types-pyyaml = ">=6.0.12.12"
5858
types-requests = ">=2.31.0.9"
5959
pyfakefs = ">=5.3.0"
60+
requests-mock = "^1.11.0"
61+
6062

6163
[tool.poetry.group.docs]
6264
optional = true

sed/loader/flash/loader.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,7 @@ def parquet_handler(
750750
load_parquet: bool = False,
751751
save_parquet: bool = False,
752752
force_recreate: bool = False,
753+
**kwds,
753754
) -> Tuple[dd.DataFrame, dd.DataFrame]:
754755
"""
755756
Handles loading and saving of parquet files based on the provided parameters.
@@ -835,13 +836,14 @@ def parquet_handler(
835836

836837
return dataframe_electron, dataframe_pulse
837838

838-
def parse_metadata(self) -> dict:
839+
def parse_metadata(self, scicat_token: str = None, **kwds) -> dict:
839840
"""Uses the MetadataRetriever class to fetch metadata from scicat for each run.
840841
841842
Returns:
842843
dict: Metadata dictionary
844+
scicat_token (str, optional):: The scicat token to use for fetching metadata
843845
"""
844-
metadata_retriever = MetadataRetriever(self._config["metadata"])
846+
metadata_retriever = MetadataRetriever(self._config["metadata"], scicat_token)
845847
metadata = metadata_retriever.get_metadata(
846848
beamtime_id=self._config["core"]["beamtime_id"],
847849
runs=self.runs,
@@ -924,7 +926,7 @@ def read_dataframe(
924926

925927
df, df_timed = self.parquet_handler(data_parquet_dir, **kwds)
926928

927-
metadata = self.parse_metadata() if collect_metadata else {}
929+
metadata = self.parse_metadata(**kwds) if collect_metadata else {}
928930
print(f"loading complete in {time.time() - t0: .2f} s")
929931

930932
return df, df_timed, metadata

sed/loader/flash/metadata.py

Lines changed: 21 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,23 @@ class MetadataRetriever:
1515
on beamtime and run IDs.
1616
"""
1717

18-
def __init__(self, metadata_config: Dict) -> None:
18+
def __init__(self, metadata_config: Dict, scicat_token: str = None) -> None:
1919
"""
2020
Initializes the MetadataRetriever class.
2121
2222
Args:
2323
metadata_config (dict): Takes a dict containing
24-
at least url, username and password
24+
at least url, and optionally token for the scicat instance.
25+
scicat_token (str, optional): The token to use for fetching metadata.
2526
"""
26-
self.url = metadata_config["scicat_url"]
27-
self.username = metadata_config["scicat_username"]
28-
self.password = metadata_config["scicat_password"]
27+
self.token = metadata_config.get("scicat_token", None)
28+
if scicat_token:
29+
self.token = scicat_token
30+
self.url = metadata_config.get("scicat_url", None)
31+
32+
if not self.token or not self.url:
33+
raise ValueError("No URL or token provided for fetching metadata from scicat.")
34+
2935
self.headers = {
3036
"Content-Type": "application/json",
3137
"Accept": "application/json",
@@ -80,9 +86,16 @@ def _get_metadata_per_run(self, pid: str) -> Dict:
8086
Raises:
8187
Exception: If the request to retrieve metadata fails.
8288
"""
89+
headers2 = dict(self.headers)
90+
headers2["Authorization"] = f"Bearer {self.token}"
8391
try:
8492
# Create the dataset URL using the PID
85-
dataset_response = requests.get(self._create_dataset_url_by_PID(pid), timeout=10)
93+
dataset_response = requests.get(
94+
self._create_dataset_url_by_PID(pid),
95+
params={"access_token": self.token},
96+
headers=headers2,
97+
timeout=10,
98+
)
8699
dataset_response.raise_for_status() # Raise HTTPError if request fails
87100
# If the dataset request is successful, return the retrieved metadata
88101
# as a JSON object
@@ -105,37 +118,9 @@ def _create_dataset_url_by_PID(self, pid: str) -> str: # pylint: disable=invali
105118
Raises:
106119
Exception: If the token request fails.
107120
"""
108-
npid = ("/" + pid).replace(
121+
npid = pid.replace(
109122
"/",
110123
"%2F",
111124
) # Replace slashes in the PID with URL-encoded slashes
112-
url = f"{self.url}/RawDatasets/{npid}?access_token={self._get_token()}"
125+
url = f"{self.url}/Datasets/{npid}"
113126
return url
114-
115-
def _get_token(self) -> str:
116-
"""
117-
Retrieves the access token for authentication.
118-
119-
Returns:
120-
str: The access token.
121-
122-
Raises:
123-
Exception: If the token request fails.
124-
"""
125-
try:
126-
token_url = f"{self.url}/Users/login"
127-
# Send a POST request to the token URL with the username and password
128-
token_response = requests.post(
129-
token_url,
130-
headers=self.headers,
131-
json={"username": self.username, "password": self.password},
132-
timeout=10,
133-
)
134-
token_response.raise_for_status()
135-
# If the token request is successful, return the access token from the response
136-
return token_response.json()["id"]
137-
138-
# Otherwise issue warning
139-
except requests.exceptions.RequestException as exception:
140-
warnings.warn(f"Failed to retrieve authentication token: {str(exception)}")
141-
return "" # Return an empty string if token retrieval fails

0 commit comments

Comments
 (0)