Skip to content

Model

SourceRecord dataclass

A class representing a record from a source.

Source code in dataimporter/lib/model.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
@dataclass
class SourceRecord:
    """
    A class representing a record from a source.
    """

    id: str
    data: Data
    # the name of the source of this record data
    source: str

    @property
    def is_deleted(self) -> bool:
        """
        Returns True if the record's data represents a deletion, False if not.

        :return: True if the record's data represents a deletion, False if not.
        """
        return not bool(self.data)

    def __bool__(self) -> bool:
        """
        Returns True if the record has data, False if not. This is the opposite of the
        is_deleted property.

        :return: True if the record has data, False if not.
        """
        return bool(self.data)

    def __contains__(self, field: str) -> bool:
        """
        Checks if the given field is present in this record's data.

        :param field: the field name
        :return: True if the field exists, False if not
        """
        return field in self.data

    def __eq__(self, other: Any) -> bool:
        """
        Compares any object to this record to determine if they are the same. If the
        other object passed is a SourceRecord type object then the id and data fields
        will be compared and if they are both the same as this record then True is
        returned, otherwise False.

        If the other is not a SourceRecord, NotImplemented is returned.

        The source is completely ignored as it isn't important in comparisons (we don't
        care where something came from, it's the ID and data that matter).

        :param other: the other object
        :return: True if they're the same, False if not, or NotImplemented if the other
            object isn't a SourceRecord
        """
        if isinstance(other, SourceRecord):
            return self.id == other.id and self.data == other.data
        return NotImplemented

    def get_embargo(self) -> Optional[int]:
        """
        Get the embargo date from this record if one exists. This will only return a
        value for EMu records.

        :return: the embargo date furthest in the future, or None if none is found
        """
        embargo = None
        for value in self.iter_all_values(
            'NhmSecEmbargoDate', 'NhmSecEmbargoExtensionDate'
        ):
            try:
                date = parse_to_timestamp(value, '%Y-%m-%d')
                if embargo is None or date > embargo:
                    embargo = date
            except ValueError:
                pass

        return embargo

    def iter_all_values(self, *fields: str, clean: bool = True) -> Iterable[str]:
        """
        Retrieves all the values from the given fields and yield them one by one. If
        there are no values available, nothing is yielded. If there are values, each one
        is yielded on its own. This means each value yielded will be a str, tuples are
        yielded element by element. For example, with the data dict:

            {
              "x": ("a","b","c"),
              "y": "d",
              "z": ("e", "f")
            }

        calling iter_all_values("x", "y", "z") yields "a", "b", "c", "d", "e", and "f".

        If the clean parameter is True (the default), then empty strings are removed. If
        it is False then they are yielded.

        :param fields: the fields to extract values from
        :param clean: whether to remove empty strings (default: True)
        :return: yields str values in the provided field order
        """
        for field in fields:
            value = self.data.get(field)
            if value is not None:
                if isinstance(value, tuple):
                    if clean:
                        yield from filter(None, value)
                    else:
                        yield from value
                else:
                    if not clean or value:
                        yield value

    def get_all_values(
        self,
        *fields: str,
        clean: bool = True,
        reduce: bool = True,
    ) -> Union[None, str, Tuple[str, ...]]:
        """
        Retrieves all the values from the given fields and returns them. If there are no
        values available, None is returned. If there is only one value (even if it's a
        1-tuple) then the value is returned on its own. If there is more than one value,
        a tuple of all the values is returned. This means that with the data dict:

            {
              "x": ("a","b","c"),
              "y": "d",
              "z": ("e", "f")
            }

        calling get_all_values("x", "y", "z") returns ("a", "b", "c", "d", "e", "f").

        :param fields: the fields to extract the values from
        :param clean: whether to remove empty strings (default: True)
        :param reduce: whether to turn a 1 length tuple into a string or leave it as a
                       1 length tuple. Defaults to True which will do this conversion.
        :return: None if no values, a str if there's only one value, otherwise, a tuple
                 of str containing all the values, in provided field order.
        """
        values = tuple(self.iter_all_values(*fields, clean=clean))
        if len(values) == 0:
            return None
        elif len(values) == 1 and reduce:
            return values[0]
        else:
            return values

    def get_first_value(
        self,
        *fields: str,
        clean: bool = True,
        default: Any = None,
        lower: bool = False,
    ) -> Optional[str]:
        """
        Retrieves the first value present in the given fields. If the first field with a
        value in contains a tuple not a str, then the first element of the tuple is
        returned, not the tuple itself. The fields are iterated over in the order they
        are provided until a value is found.

        :param fields: the fields to extract a value from
        :param clean: whether to remove empty strings (default: True)
        :param default: the value to return if no values are found for the fields given
            (default: None)
        :param lower: whether to lowercase the value or not before returning it
        :return: the first value from the given fields, or the default
        """
        value = next(iter(self.iter_all_values(*fields, clean=clean)), default)
        if value and lower:
            return value.lower()
        return value

is_deleted property

Returns True if the record's data represents a deletion, False if not.

Returns:

Type Description
bool

True if the record's data represents a deletion, False if not.

__bool__()

Returns True if the record has data, False if not. This is the opposite of the is_deleted property.

Returns:

Type Description
bool

True if the record has data, False if not.

Source code in dataimporter/lib/model.py
29
30
31
32
33
34
35
36
def __bool__(self) -> bool:
    """
    Returns True if the record has data, False if not. This is the opposite of the
    is_deleted property.

    :return: True if the record has data, False if not.
    """
    return bool(self.data)

__contains__(field)

Checks if the given field is present in this record's data.

Parameters:

Name Type Description Default
field str

the field name

required

Returns:

Type Description
bool

True if the field exists, False if not

Source code in dataimporter/lib/model.py
38
39
40
41
42
43
44
45
def __contains__(self, field: str) -> bool:
    """
    Checks if the given field is present in this record's data.

    :param field: the field name
    :return: True if the field exists, False if not
    """
    return field in self.data

__eq__(other)

Compares any object to this record to determine if they are the same. If the other object passed is a SourceRecord type object then the id and data fields will be compared and if they are both the same as this record then True is returned, otherwise False.

If the other is not a SourceRecord, NotImplemented is returned.

The source is completely ignored as it isn't important in comparisons (we don't care where something came from, it's the ID and data that matter).

Parameters:

Name Type Description Default
other Any

the other object

required

Returns:

Type Description
bool

True if they're the same, False if not, or NotImplemented if the other object isn't a SourceRecord

Source code in dataimporter/lib/model.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def __eq__(self, other: Any) -> bool:
    """
    Compares any object to this record to determine if they are the same. If the
    other object passed is a SourceRecord type object then the id and data fields
    will be compared and if they are both the same as this record then True is
    returned, otherwise False.

    If the other is not a SourceRecord, NotImplemented is returned.

    The source is completely ignored as it isn't important in comparisons (we don't
    care where something came from, it's the ID and data that matter).

    :param other: the other object
    :return: True if they're the same, False if not, or NotImplemented if the other
        object isn't a SourceRecord
    """
    if isinstance(other, SourceRecord):
        return self.id == other.id and self.data == other.data
    return NotImplemented

get_all_values(*fields, clean=True, reduce=True)

Retrieves all the values from the given fields and returns them. If there are no values available, None is returned. If there is only one value (even if it's a 1-tuple) then the value is returned on its own. If there is more than one value, a tuple of all the values is returned. This means that with the data dict:

{
  "x": ("a","b","c"),
  "y": "d",
  "z": ("e", "f")
}

calling get_all_values("x", "y", "z") returns ("a", "b", "c", "d", "e", "f").

Parameters:

Name Type Description Default
fields str

the fields to extract the values from

()
clean bool

whether to remove empty strings (default: True)

True
reduce bool

whether to turn a 1 length tuple into a string or leave it as a 1 length tuple. Defaults to True which will do this conversion.

True

Returns:

Type Description
Union[None, str, Tuple[str, ...]]

None if no values, a str if there's only one value, otherwise, a tuple of str containing all the values, in provided field order.

Source code in dataimporter/lib/model.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def get_all_values(
    self,
    *fields: str,
    clean: bool = True,
    reduce: bool = True,
) -> Union[None, str, Tuple[str, ...]]:
    """
    Retrieves all the values from the given fields and returns them. If there are no
    values available, None is returned. If there is only one value (even if it's a
    1-tuple) then the value is returned on its own. If there is more than one value,
    a tuple of all the values is returned. This means that with the data dict:

        {
          "x": ("a","b","c"),
          "y": "d",
          "z": ("e", "f")
        }

    calling get_all_values("x", "y", "z") returns ("a", "b", "c", "d", "e", "f").

    :param fields: the fields to extract the values from
    :param clean: whether to remove empty strings (default: True)
    :param reduce: whether to turn a 1 length tuple into a string or leave it as a
                   1 length tuple. Defaults to True which will do this conversion.
    :return: None if no values, a str if there's only one value, otherwise, a tuple
             of str containing all the values, in provided field order.
    """
    values = tuple(self.iter_all_values(*fields, clean=clean))
    if len(values) == 0:
        return None
    elif len(values) == 1 and reduce:
        return values[0]
    else:
        return values

get_embargo()

Get the embargo date from this record if one exists. This will only return a value for EMu records.

Returns:

Type Description
Optional[int]

the embargo date furthest in the future, or None if none is found

Source code in dataimporter/lib/model.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def get_embargo(self) -> Optional[int]:
    """
    Get the embargo date from this record if one exists. This will only return a
    value for EMu records.

    :return: the embargo date furthest in the future, or None if none is found
    """
    embargo = None
    for value in self.iter_all_values(
        'NhmSecEmbargoDate', 'NhmSecEmbargoExtensionDate'
    ):
        try:
            date = parse_to_timestamp(value, '%Y-%m-%d')
            if embargo is None or date > embargo:
                embargo = date
        except ValueError:
            pass

    return embargo

get_first_value(*fields, clean=True, default=None, lower=False)

Retrieves the first value present in the given fields. If the first field with a value in contains a tuple not a str, then the first element of the tuple is returned, not the tuple itself. The fields are iterated over in the order they are provided until a value is found.

Parameters:

Name Type Description Default
fields str

the fields to extract a value from

()
clean bool

whether to remove empty strings (default: True)

True
default Any

the value to return if no values are found for the fields given (default: None)

None
lower bool

whether to lowercase the value or not before returning it

False

Returns:

Type Description
Optional[str]

the first value from the given fields, or the default

Source code in dataimporter/lib/model.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def get_first_value(
    self,
    *fields: str,
    clean: bool = True,
    default: Any = None,
    lower: bool = False,
) -> Optional[str]:
    """
    Retrieves the first value present in the given fields. If the first field with a
    value in contains a tuple not a str, then the first element of the tuple is
    returned, not the tuple itself. The fields are iterated over in the order they
    are provided until a value is found.

    :param fields: the fields to extract a value from
    :param clean: whether to remove empty strings (default: True)
    :param default: the value to return if no values are found for the fields given
        (default: None)
    :param lower: whether to lowercase the value or not before returning it
    :return: the first value from the given fields, or the default
    """
    value = next(iter(self.iter_all_values(*fields, clean=clean)), default)
    if value and lower:
        return value.lower()
    return value

iter_all_values(*fields, clean=True)

Retrieves all the values from the given fields and yield them one by one. If there are no values available, nothing is yielded. If there are values, each one is yielded on its own. This means each value yielded will be a str, tuples are yielded element by element. For example, with the data dict:

{
  "x": ("a","b","c"),
  "y": "d",
  "z": ("e", "f")
}

calling iter_all_values("x", "y", "z") yields "a", "b", "c", "d", "e", and "f".

If the clean parameter is True (the default), then empty strings are removed. If it is False then they are yielded.

Parameters:

Name Type Description Default
fields str

the fields to extract values from

()
clean bool

whether to remove empty strings (default: True)

True

Returns:

Type Description
Iterable[str]

yields str values in the provided field order

Source code in dataimporter/lib/model.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def iter_all_values(self, *fields: str, clean: bool = True) -> Iterable[str]:
    """
    Retrieves all the values from the given fields and yield them one by one. If
    there are no values available, nothing is yielded. If there are values, each one
    is yielded on its own. This means each value yielded will be a str, tuples are
    yielded element by element. For example, with the data dict:

        {
          "x": ("a","b","c"),
          "y": "d",
          "z": ("e", "f")
        }

    calling iter_all_values("x", "y", "z") yields "a", "b", "c", "d", "e", and "f".

    If the clean parameter is True (the default), then empty strings are removed. If
    it is False then they are yielded.

    :param fields: the fields to extract values from
    :param clean: whether to remove empty strings (default: True)
    :return: yields str values in the provided field order
    """
    for field in fields:
        value = self.data.get(field)
        if value is not None:
            if isinstance(value, tuple):
                if clean:
                    yield from filter(None, value)
                else:
                    yield from value
            else:
                if not clean or value:
                    yield value