fosanalysis
A framework to evaluate distributed fiber optic sensor data
Loading...
Searching...
No Matches
filereader.py
Go to the documentation of this file.
1r"""
2Contains class implementations to read ODiSI sensor data.
3FileReader Base class of file reader implementation.
4TsvReader Supports the reader for .tsv files.
5DatReader Supports the reader for .dat files.
6
7\author Anett Kielreiter
8\date 2024
9"""
10
11import numpy as np
12import json
13import os
14import copy
15from collections import OrderedDict
16from datetime import datetime
17from abc import abstractmethod
18from collections.abc import Generator
19from pathlib import Path
20
21from fosanalysis import utils
22
23
24class SensorInfo():
25 r"""
26 Class to hold sensor specific data (name, serial number, ...).
27 """
29 self,
30 name: str,
31 serial_number: str,
32 part_number: str,
33 channel: int,
34 sensor_type: str,
35 length: float,
36 y_axis_unit: str,
37 x_axis_unit: str,
38 patch_cord: float,
39 key_name: str,
40 tare_name: str,
41 gages: OrderedDict,
42 segments: OrderedDict,
43 x_axis: list = None,
44 tare: list = None
45 ):
46 r"""
47 Constructs a sensor info object with information.
48 \param name Name of the sensor.
49 \param serial_number Serial number of the sensor.
50 \param part_number Part number of the sensor.
51 \param channel Channel of the sensor.
52 \param sensor_type Type of the sensor, e.g. temperature or strain.
53 \param length Sensor length in meter.
54 \param y_axis_unit Unit of the sensor values as string.
55 \param x_axis_unit Unit of the x-axis values as string.
56 \param patch_cord Patch cord length as float.
57 \param key_name Name of the sensor key, defined by initialization.
58 \param tare_name Current tare name of the sensor.
59 \param gages Dictionary with all defined gage names and x-position.
60 \param segments Dictionary with all defined segments.
61 \param x_axis X-axis values of the measurement as array, optional.
62 \param tare Tare values of the measurement as array, optional.
63 """
64 self.name = name
65 self.serial_number = serial_number
66 self.part_number = part_number
67 self.channel = channel
68 self.sensor_type = sensor_type
69 self.length = length
70 self.y_axis_unit = y_axis_unit
71 self.x_axis_unit = x_axis_unit
72 self.patch_cord = patch_cord
73 self.key_name = key_name
74 self.tare_name = tare_name
75 self.gages = gages
76 self.segments = segments
77 self.x_axis = x_axis
78 self.tare = tare
79
80 def __eq__(self, other) -> bool:
81 r"""
82 Overloads the comparison operator for equal.
83 """
84 return (self.name == other.name and
85 self.serial_number == other.serial_number)
86
87
89 r"""
90 Abstract class.
91 It specifies the basic interface for reading a data file.
92 """
93 @abstractmethod
94 def __init__(self, *args, **kwargs):
95 r"""
96 Constructs a FileReader object.
97 Needs to be reimplemented by sub-classes.
98 """
99 super().__init__(*args, **kwargs)
100
101 @abstractmethod
102 def read_meta_infos(self, *args, **kwargs) -> tuple:
103 r"""
104 Abstract method to get meta and base data of the data file.
105 It contains gages/segments, x-axis and tare
106 of all available sensors.
107 """
108 raise NotImplementedError()
109
110 @abstractmethod
111 def read_next_measurement(self, *args, **kwargs) -> tuple:
112 r"""
113 Returns single measurement of the strain data.
114 """
115 raise NotImplementedError()
116
117 @abstractmethod
118 def read_dat_file(self, dat_file, *args, **kwargs) -> tuple:
119 r"""
120 Reads only single file and returns
121 all measurement data separated by channels.
122 """
123 pass
124
125
126class TsvReader(FileReader):
127 r"""
128 File reader class for the `.tsv` measurement files exported by the
129 ODiSI 6100 series interrogators by Luna Inc LunaInnovations2020.
130 See \cite LunaInnovations2020.
131 Both - gage files (`*_gages.tsv`) and full (`*_full.tsv`) - are supported.
132 """
133 def __init__(self, filepath: str, *args, **kwargs):
134 r"""
135 Initializes a tsv file reader.
136 \param filepath Path to the .tsv file to read all sensor information.
137 \param *args Additional positional arguments,
138 will be passed to the superconstructor.
139 \param **kwargs Additional keyword arguments,
140 will be passed to the superconstructor.
141 """
142 super().__init__(*args, **kwargs)
143
144 self.filename = filepath
145
146 self.itemsep = "\t"
147
148
151
152
153 if 'full' in self.other_tsv:
154 self.file = open(self.other_tsv)
155 else:
156 self.file = open(filepath)
157
158 def read_meta_infos(self) -> tuple[list, dict]:
159 r"""
160 Method to get sensor data from file.
161 For TSV only one sensor exists in the file.
162 \return List with all sensor infos.
163 \return Dictionary with measurement specific data.
164 """
165 self.file.seek(0) # reset the file pointer
166 metadata = self._read_metadata()
167 x_axis, tare, segments, gages = self._read_base_data()
168 sensor_list = []
169 sensor = SensorInfo(metadata.get('Sensor Name', ''),
170 metadata.get('Sensor Serial Number', ''),
171 metadata.get('Sensor Part Number', ''),
172 int(metadata.get('Channel', 0)),
173 metadata.get('Sensor Type', ''),
174 float(metadata.get('Length (m)', 0.0)),
175 metadata.get('Units', ''),
176 metadata.get('X-Axis Units', ''),
177 float(metadata.get('Patch Cord Length (m)', 0)),
178 metadata.get('Key Name', ''),
179 metadata.get('Tare Name', ''),
180 gages,
181 segments,
182 x_axis,
183 tare)
184 sensor_list.append(sensor)
185 return sensor_list, metadata
186
187 def _read_metadata(self) -> dict:
188 r"""
189 Retrieves the meta data of the sensor file.
190 Reads the header data like sensor name, length, gage_pitch, ...
191 \return Metadata as dictionary.
192 """
193 metadata = {}
194 for line in self.file:
195 if "------" in line:
196 # Stop reading metadata
197 break
198 line_list = line.strip().split(self.itemsep)
199 if len(line_list) >= 1:
200 # Read in metadata
201 fieldname = line_list[0][:-1] # Field name separated by ':'
202 if len(line_list) > 1:
203 metadata[fieldname] = line_list[1]
204 else:
205 metadata[fieldname] = ""
206 return metadata
207
209 self
210 ) -> tuple[list, list, OrderedDict, OrderedDict]:
211 r"""
212 Get the base data for all measurements.
213 It reads the segments, gages, x-axis and tare of the file.
214 Search for '_gages' and '_full' files.
215 In case of '_gages' file separate gages and segments.
216 In case of only existing '_full' file no gages will returned.
217 \return X-axis gages of current measurement.
218 \return Tare values of measurement.
219 \return All segments with index and length available in the
220 file as OrderedDict, empty if no segments available.
221 \return All gages with its index in the file as OrderedDict,
222 empty if no gages available.
223 """
224 gages = OrderedDict()
225 segments = OrderedDict()
226 if self.other_tsv != '': # both files (full + gages) are in the folder
227 x_axis, tare = self._read_base_from_file(segments, gages)
228
229 if '_gage' in self.other_tsv:
230 gage_file = open(self.other_tsv)
231 else:
232 gage_file = open(self.filename)
233
234 for line in gage_file:
235 line_list = line.strip().split(self.itemsep)
236 if len(line_list) > 3:
237 if line_list[0].lower() == 'gage/segment name':
238 self._read_gage_segments_info(line_list[3:],
239 segments,
240 gages)
241 if line_list[0].lower() == 'x-axis':
242 self._set_pos_for_segments_gages(line_list[3:],
243 segments,
244 gages)
245 else:
246 x_axis, tare = self._read_base_from_file(segments, gages)
247
248 return x_axis, tare, segments, gages
249
251 start_time: datetime = None,
252 end_time: datetime = None,
253 *args, **kwargs) -> Generator[datetime, np.array]:
254 r"""
255 Returns only single entry of measurement as generator
256 to request for multiple times for parts or the whole file.
257 \param start_time Read measurements from given timestamp.
258 \param end_time Read measurements till given timestamp.
259 \return Datetime object of current line.
260 \return Strain values of the current line as array.
261 """
262 for line in self.file:
263 line_list = line.strip().split(self.itemsep)
264 if len(line_list) > 3 and line_list[1].lower() == 'measurement':
265 timestamp = datetime.fromisoformat(line_list[0])
266 if (end_time is not None
267 and end_time < timestamp):
268 break
269 if (start_time is not None
270 and start_time <= timestamp):
271 yield timestamp, np.array(line_list[3:], dtype=float)
272 elif start_time is None:
273 yield timestamp, np.array(line_list[3:], dtype=float)
274
275 def read_dat_file(self, dat_file):
276 raise NotImplementedError("Can't use 'read_dat_file' on TsvReader")
277
279 segments: OrderedDict,
280 gages: OrderedDict) -> tuple[list, list]:
281 r"""
282 Read gage/segment, x-axis and tare line to discover related data.
283 The segments are written into given segments dictionary.
284 The gages are written into given gages dictionary.
285 This information is used later on to split the data.
286 \param segments Dictionary with named segments is written.
287 \param gages Dictionary, with named gages is written.
288 \return Array with gages of x-axis existing in file.
289 \return Array with whole tare of file, empty if not exist.
290 """
291 self.file.seek(0) #reset the file pointer
292 x_axis = []
293 tare = []
294 for line in self.file:
295 line_list = line.strip().split(self.itemsep)
296 if len(line_list) > 3:
297 if line_list[0].lower() == 'gage/segment name':
299 line_list[3:], segments, gages)
300 elif line_list[0].lower() == 'x-axis':
301 x_axis = list(map(float, line_list[3:]))
302 if '_gage' in Path(self.filename).name:
303 self._set_pos_for_segments_gages(x_axis,
304 segments,
305 gages)
306 break
307 elif line_list[0].lower() == 'tare':
308 tare = list(map(float, line_list[3:]))
309 return x_axis, tare
310
312 data: list,
313 segments: OrderedDict,
314 gages: OrderedDict):
315 r"""
316 Read gage and segment line to discover the gages and segments.
317 The gages are written into given gages dictionary.
318 The segments are written into given segments dictionary.
319 This information is used later on to split the data.
320 \param data List of line elements, contain the gage and segment names.
321 \param segments Dictionary with named segments is written.
322 \param gages Dictionary, with named gages is written.
323 """
324 segment_name = None
325 for index, value in enumerate(data):
326 if "[0]" in value:
327 if segment_name is not None:
328 segments[segment_name]['length'] = (index
329 - segments[segment_name]['index'])
330 segment_name = value.split("[")[0]
331 segments[segment_name] = {'index': index}
332 elif segment_name is None:
333 gages[value] = {'index': index}
334 if segment_name is not None:
335 segments[segment_name]['length'] = (len(data)
336 - segments[segment_name]['index'])
337
339 data: list,
340 segments: OrderedDict,
341 gages: OrderedDict):
342 r"""
343 Private method to determine the index of x-positions
344 for segments and gages based on x_axis of full file.
345 This method will only be called if both files available.
346 This information is used later to split the data.
347 \param data List of line elements, contain the x-axis data.
348 \param segments Dictionary with named segments.
349 \param gages Dictionary, with named gages.
350 """
351 for gage in gages:
352 x_pos = data[gages[gage]['index']]
353 gages[gage]['x_pos'] = x_pos
354 for part in segments:
355 index = segments[part]['index']
356 x_pos_start = data[index]
357 x_pos_end = data[index + segments[part]['length']]
358 start_gage = next((name for name, pos in gages.items()
359 if pos == x_pos_start), None)
360 end_gage = next((name for name, pos in gages.items()
361 if pos == x_pos_end), None)
362 if start_gage is not None and end_gage is not None:
363 segments[part]['start_gage'] = start_gage
364 segments[part]['end_gage'] = end_gage
365
367 r"""
368 Private method to look for both files (_full and _gage) in folder.
369 If filename is 'gages.tsv' file search for 'full.tsv' and vice versa.
370 \return Path to the found .tsv file, otherwise empty string.
371 """
372 other_tsv = ''
373 if '_gages.tsv' in self.filename:
374 ref_path = Path(self.filename)
375 new_name = ref_path.name[0:-9] + "full.tsv"
376 elif '_full.tsv' in self.filename:
377 ref_path = Path(self.filename)
378 new_name = ref_path.name[0:-8] + "gages.tsv"
379 else:
380 return other_tsv
381 path_2 = os.path.join(ref_path.parent, new_name)
382 if os.path.exists(path_2):
383 other_tsv = path_2
384 return other_tsv
385
386
388 r"""
389 File reader class for the `.dat` measurement files recorded by the
390 ODiSI 6100 series interrogators by Luna Inc LunaInnovations2020.
391 See \cite LunaInnovations2020.
392 The file contains the raw data from measurement,
393 mostly from different channels (1-8 possible).
394 """
395 DEFAULT_FILE_VERSION = 9
396
397 def __init__(self, filepath: str, *args, **kwargs):
398 r"""
399 Initializes a .dat file reader.
400 \param filepath Path to file object to read in sensor data.
401 \param *args Additional positional arguments,
402 will be passed to the superconstructor.
403 \param **kwargs Additional keyword arguments,
404 will be passed to the superconstructor.
405 """
406 super().__init__(*args, **kwargs)
407
408 self.filename = filepath
409
410 self.itemsep = ","
411
413
415
416 self.file = open(filepath)
417
418 def read_meta_infos(self) -> tuple[list, dict]:
419 r"""
420 Method to get sensor data from file.
421 Returns all sensor data available in the .dat file.
422 \return List of available sensors.
423 \return Dictionary with measurement specific data.
424 """
425 self.file.seek(0) # reset the file pointer
426 sensor_list = []
427 metadata = {}
428 for line in self.file:
429 line_dict = json.loads(line)
430 if 'sensors' in line_dict:
431 metadata, sensor_list = self._read_metadata(line_dict)
432 self.file_version = metadata.get('file version',
434 elif 'data' in line_dict:
435 self._read_base_data(line_dict, sensor_list)
436 elif 'filename' in line_dict:
437 self.measurement_files.append(
438 os.path.join(os.path.dirname(self.file.name),
439 line_dict['filename']))
440 return sensor_list, metadata
441
442 def _read_metadata(self, meta_dict: dict) -> dict:
443 r"""
444 Retrieves the meta data of the sensor file with multi channels.
445 Reads the header data like sensor name, length, gage_pitch, ...
446 \param meta_dict Dictionary with meta infos of first line.
447 \return Metadata as dictionary without sensor info.
448 \return List of SensorInfo objects for each channel.
449 """
450 sensor_dict = None
451 sensor_list = []
452 for sensor in meta_dict.get('sensors', []):
453 sensor_dict = copy.deepcopy(sensor)
454 segments, gages = self._extract_segments_gages(sensor_dict)
455 sensor_info = SensorInfo(sensor_dict.get('sensor name', ''),
456 sensor_dict.get('sensor serial number', ''),
457 sensor_dict.get('sensor part number', ''),
458 sensor_dict.get('channel', 0),
459 sensor_dict.get('sensor type (disp)', ''),
460 sensor_dict.get('length (m)', 0.0),
461 sensor_dict.get('units', ''),
462 sensor_dict.get('x-axis units', ''),
463 sensor_dict.get('patch cord (m)', 0.0),
464 sensor_dict.get('key name', ''),
465 sensor_dict.get('tare name', ''),
466 gages,
467 segments)
468 sensor_list.append(sensor_info)
469
470 meta_dict.pop('sensors')
471 return meta_dict, sensor_list
472
473 def _read_base_data(self, line_dict: dict, sensor_list: list):
474 r"""
475 Reads one line of base data file, parse for channel and record type.
476 The method reads the x-axis and tare if available
477 and adds it to the corresponding SensorInfo object of given list.
478 \param line_dict Current line of hdr.dat with x-axis or tare data.
479 \param sensor_list List of SensorInfo objects created by first line.
480 """
481 curr_ch = line_dict.get('channel', None)
482 ch_sensor = next((sensor for sensor in sensor_list
483 if sensor.channel == curr_ch), None)
484
485 if ch_sensor is None:
486 return
487 if line_dict['record type'] == 'x-axis':
488 ch_sensor.x_axis = line_dict['data']
489 elif line_dict['record type'] == 'tare':
490 ch_sensor.tare = line_dict['data']
491
493 channel: int,
494 start_time: datetime = None,
495 end_time: datetime = None,
496 *args, **kwargs) -> Generator[datetime, np.array]:
497 r"""
498 Returns only single entry of a measurement file as generator
499 to request for multiple times for parts or the whole file.
500 Checks the given channel first.
501 \param channel Channel of the selected sensor.
502 \param start_time Read measurements from given timestamp.
503 \param end_time Read measurements till given timestamp.
504 \return Datetime object of current line.
505 \return Strain values of the current line as array.
506 """
507 if channel is None:
508 return None, None
509 for data_file in self.measurement_files:
510 with open(data_file) as f:
511 for line in f:
512 line_dict = json.loads(line)
513 if (line_dict.get('channel', None) == channel
514 and line_dict.get('record type', '') == 'measurement'
515 and 'data' in line_dict):
516 time = datetime.fromisoformat(line_dict['timestamp'])
517 if (end_time is not None
518 and end_time < time):
519 break
520 if (start_time is not None
521 and start_time <= time):
522 yield time, self._parse_data(line_dict['data'])
523 elif start_time is None:
524 yield time, self._parse_data(line_dict['data'])
525
526 def read_dat_file(self, dat_file):
527 r"""
528 Reads single .dat file and parses timestamps and data for each channel
529 Returns a dictionary with channel as key.
530 \param dat_file Path of the .dat file to read from.
531 \return data_dict Dictionary with timestamps and data
532 for each channel in file.
533 """
534 data_dict = {}
535
536 with open(dat_file) as f:
537 for line in f:
538 line_dict = json.loads(line)
539 channel = line_dict.get('channel', None)
540 if (line_dict.get('record type', '') == 'measurement'
541 and 'data' in line_dict):
542 timestamp = datetime.fromisoformat(line_dict['timestamp'])
543 data = self._parse_data(line_dict['data'])
544 if not channel in data_dict:
545 data_dict[channel] = {'timestamps': [timestamp], 'datas': [data]}
546 else:
547 data_dict[channel]['timestamps'].append(timestamp)
548 data_dict[channel]['datas'].append(data)
549
550 return data_dict
551
553 self,
554 sensor_dict: dict
555 ) -> tuple[OrderedDict, OrderedDict]:
556 r"""
557 Private method to extract the gages and segments
558 from metadata dictionary.
559 The gages/segments are stored in the first line of the .dat files.
560 \param sensor_dict Dictionary with gages and segments.
561 \return Segments of the sensor as ordered dict.
562 \return Gages of the sensor as ordered dict.
563 """
564 gages = OrderedDict()
565 segments = OrderedDict()
566 # Read gages
567 for gage in sensor_dict.get('gages', []):
568 gages[gage['gage name']] = {'index':gage['index'],
569 'x_pos': gage['location (mm)']/1000}
570 # Read segments
571 for segment in sensor_dict.get('segments', []):
572 name = segment['segment name']
573 segments[name] = {'index':segment['index'],
574 'length':segment['size'],
575 'start_gage': segment['start gage'],
576 'end_gage': segment['end gage'],}
577 return segments, gages
578
579 def _parse_data(self, strain_data) -> np.array:
580 r"""
581 Private method to parse the measurement data
582 dependent on type and file version.
583 File version = 7: Data provided as bytecode, type is string.
584 File version = 9: Data provided as list with a divisor of 10.
585
586 \param strain_data Strain data provided by raw data file,
587 maybe string or list.
588 \return Array with converted strain values according to file version.
589 """
590 if type(strain_data) is list:
591 nan_list = self._replace_none(strain_data)
592 if self.file_version >= 9:
593 return np.asarray(nan_list, dtype=float) * 0.1
594 else:
595 return np.asarray(nan_list, dtype=float)
596 if self.file_version == 7 and type(strain_data) is str:
597 int_array = np.frombuffer(bytearray.fromhex(strain_data),
598 np.uint8)
599 strain_list = []
600 for index in range(0, len(int_array), 2):
601 factor = int_array[index]
602 value = int_array[index+1]
603
604 if factor == 128 and value == 0:
605 strain = np.nan
606 elif factor > 128:
607 neg = 256 * (256-factor)
608 strain = value - neg
609 else:
610 strain = factor * 256 + value
611
612 strain_list.append(strain)
613 return np.asarray(strain_list, dtype=float)
614 return np.asarray(strain_data)
615
616 def _replace_none(self, values: list) -> list:
617 r"""
618 Private method to replace all 'None' values as NaN values.
619 \param values List with strain data.
620 \return List with np.nan values instead of 'None'.
621 """
622 replaced_list = [np.nan if x is None else x for x in values]
623 return replaced_list
File reader class for the .dat measurement files recorded by the ODiSI 6100 series interrogators by L...
filename
File object which is already opened.
list measurement_files
List of files where the measurement data stores.
str itemsep
Stores the item separator, for dat files the "," is used.
np.array _parse_data(self, strain_data)
tuple[list, dict] read_meta_infos(self)
Method to get sensor data from file.
Generator[datetime, np.array] read_next_measurement(self, int channel, datetime start_time=None, datetime end_time=None, *args, **kwargs)
Returns only single entry of a measurement file as generator to request for multiple times for parts ...
file
Create a file pointer to the raw data .hdr file.
_read_base_data(self, dict line_dict, list sensor_list)
Reads one line of base data file, parse for channel and record type.
int file_version
Stores the file version of .dat file.
dict _read_metadata(self, dict meta_dict)
Retrieves the meta data of the sensor file with multi channels.
tuple[OrderedDict, OrderedDict] _extract_segments_gages(self, dict sensor_dict)
Private method to extract the gages and segments from metadata dictionary.
__init__(self, str filepath, *args, **kwargs)
Initializes a .dat file reader.
read_dat_file(self, dat_file)
Reads single .dat file and parses timestamps and data for each channel Returns a dictionary with chan...
list _replace_none(self, list values)
Private method to replace all 'None' values as NaN values.
tuple read_next_measurement(self, *args, **kwargs)
Returns single measurement of the strain data.
tuple read_dat_file(self, dat_file, *args, **kwargs)
Reads only single file and returns all measurement data separated by channels.
tuple read_meta_infos(self, *args, **kwargs)
Abstract method to get meta and base data of the data file.
__init__(self, *args, **kwargs)
Constructs a FileReader object.
Definition filereader.py:94
Class to hold sensor specific data (name, serial number, ...).
Definition filereader.py:24
__init__(self, str name, str serial_number, str part_number, int channel, str sensor_type, float length, str y_axis_unit, str x_axis_unit, float patch_cord, str key_name, str tare_name, OrderedDict gages, OrderedDict segments, list x_axis=None, list tare=None)
Constructs a sensor info object with information.
Definition filereader.py:45
bool __eq__(self, other)
Overloads the comparison operator for equal.
Definition filereader.py:80
tuple[list, list, OrderedDict, OrderedDict] _read_base_data(self)
Get the base data for all measurements.
file
Create a file pointer to the TSV file.
dict _read_metadata(self)
Retrieves the meta data of the sensor file.
_set_pos_for_segments_gages(self, list data, OrderedDict segments, OrderedDict gages)
Private method to determine the index of x-positions for segments and gages based on x_axis of full f...
Generator[datetime, np.array] read_next_measurement(self, datetime start_time=None, datetime end_time=None, *args, **kwargs)
Returns only single entry of measurement as generator to request for multiple times for parts or the ...
str other_tsv
Checks if both files (_full and _gage) are available, if not it is an empty string.
tuple[list, list] _read_base_from_file(self, OrderedDict segments, OrderedDict gages)
Read gage/segment, x-axis and tare line to discover related data.
tuple[list, dict] read_meta_infos(self)
Method to get sensor data from file.
str _locate_full_or_gage_file(self)
Private method to look for both files (_full and _gage) in folder.
__init__(self, str filepath, *args, **kwargs)
Initializes a tsv file reader.
_read_gage_segments_info(self, list data, OrderedDict segments, OrderedDict gages)
Read gage and segment line to discover the gages and segments.
str itemsep
Stores the item separator, for tsv files it is "\t".
read_dat_file(self, dat_file)
Reads only single file and returns all measurement data separated by channels.
Abstract base class, which deals with superflous constructor arguments.
Definition base.py:11