Skip to content

pad_sequence

PadSequence

Bases: NumpyOp

Pad sequences to the same length with provided value.

Parameters:

Name Type Description Default
inputs Union[str, Iterable[str]]

Key(s) of sequences to be padded.

required
outputs Union[str, Iterable[str]]

Key(s) of sequences that are padded.

required
max_len int

Maximum length of all sequences.

required
value Union[str, int]

Padding value.

0
append bool

Pad before or after the sequences. True for padding the values after the sequence, False otherwise.

True
mode Union[None, str, Iterable[str]]

What mode(s) to execute this Op in. For example, "train", "eval", "test", or "infer". To execute regardless of mode, pass None. To execute in all modes except for a particular one, you can pass an argument like "!infer" or "!train".

None
ds_id Union[None, str, Iterable[str]]

What dataset id(s) to execute this Op in. To execute regardless of ds_id, pass None. To execute in all ds_ids except for a particular one, you can pass an argument like "!ds1".

None
Source code in fastestimator/fastestimator/op/numpyop/univariate/pad_sequence.py
@traceable()
class PadSequence(NumpyOp):
    """Pad sequences to the same length with provided value.

    Args:
        inputs: Key(s) of sequences to be padded.
        outputs: Key(s) of sequences that are padded.
        max_len: Maximum length of all sequences.
        value: Padding value.
        append: Pad before or after the sequences. True for padding the values after the sequence, False otherwise.
        mode: What mode(s) to execute this Op in. For example, "train", "eval", "test", or "infer". To execute
            regardless of mode, pass None. To execute in all modes except for a particular one, you can pass an argument
            like "!infer" or "!train".
        ds_id: What dataset id(s) to execute this Op in. To execute regardless of ds_id, pass None. To execute in all
            ds_ids except for a particular one, you can pass an argument like "!ds1".
    """
    def __init__(self,
                 inputs: Union[str, Iterable[str]],
                 outputs: Union[str, Iterable[str]],
                 max_len: int,
                 value: Union[str, int] = 0,
                 append: bool = True,
                 mode: Union[None, str, Iterable[str]] = None,
                 ds_id: Union[None, str, Iterable[str]] = None) -> None:
        super().__init__(inputs=inputs, outputs=outputs, mode=mode, ds_id=ds_id)
        self.in_list, self.out_list = True, True
        self.max_len = max_len
        self.value = value
        self.append = append

    def forward(self, data: List[np.ndarray], state: Dict[str, Any]) -> List[np.ndarray]:
        return [self._pad_sequence(elem) for elem in data]

    def _pad_sequence(self, data: np.ndarray) -> np.ndarray:
        """Pad the input sequence to the maximum length. Sequences longer than `max_len` are truncated.

        Args:
            data: input sequence in the data.

        Returns:
            Padded sequence
        """
        if len(data) < self.max_len:
            pad_len = self.max_len - len(data)
            pad_arr = np.full(pad_len, self.value)
            if self.append:
                data = np.append(data, pad_arr)
            else:
                data = np.append(pad_arr, data)
        else:
            data = data[:self.max_len]
        return data