Skip to content

Analyzer Interface

analyzer_interface

column_automap

check_name_hint(name, hint)

Returns true if every word in the hint (split by spaces) is present in the name, in a case insensitive manner.

Source code in analyzer_interface/column_automap.py
52
53
54
55
56
57
def check_name_hint(name: str, hint: str):
    """
    Returns true if every word in the hint (split by spaces) is present in the name,
    in a case insensitive manner.
    """
    return all(word.lower().strip() in name.lower() for word in hint.split(" "))

column_automap(user_columns, input_schema_columns)

Matches user-provided columns to the expected columns based on the name hints.

The resulting dictionary is keyed by the expected input column name.

Source code in analyzer_interface/column_automap.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def column_automap(
    user_columns: list[UserInputColumn], input_schema_columns: list[InputColumn]
):
    """
    Matches user-provided columns to the expected columns based on the name hints.

    The resulting dictionary is keyed by the expected input column name.
    """
    matches: dict[str, str] = {}
    for input_column in input_schema_columns:
        max_score = None
        best_match_user_column = None
        for user_column in user_columns:
            current_score = get_data_type_compatibility_score(
                input_column.data_type, user_column.data_type
            )

            # Don't consider type-incompatible columns
            if current_score is None:
                continue

            # Boost the score if we have a name hint match such that
            # - among similarly compatible matches, those with name hints are preferred
            # - among name hint matches, those with the best data type compatibility are preferred
            if any(
                check_name_hint(user_column.name, hint)
                for hint in input_column.name_hints
            ):
                current_score += 10

            if max_score is None or current_score > max_score:
                max_score = current_score
                best_match_user_column = user_column

        if best_match_user_column is not None:
            matches[input_column.name] = best_match_user_column.name

    return matches

context

AssetsReader

Bases: ABC

Source code in analyzer_interface/context.py
116
117
118
119
120
121
122
class AssetsReader(ABC):
    @abstractmethod
    def table(self, output_id: str) -> "TableReader":
        """
        Gets the table reader for the specified output.
        """
        pass
table(output_id) abstractmethod

Gets the table reader for the specified output.

Source code in analyzer_interface/context.py
117
118
119
120
121
122
@abstractmethod
def table(self, output_id: str) -> "TableReader":
    """
    Gets the table reader for the specified output.
    """
    pass

BaseDerivedModuleContext

Bases: ABC, BaseModel

Common interface for secondary analyzers and web presenters runtime contexts.

Source code in analyzer_interface/context.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
class BaseDerivedModuleContext(ABC, BaseModel):
    """
    Common interface for secondary analyzers and web presenters runtime contexts.
    """

    temp_dir: str
    """
  Gets the temporary directory that the module can freely write content to
  during its lifetime. This directory will not persist between runs.
  """

    @property
    @abstractmethod
    def base_params(self) -> dict[str, ParamValue]:
        """
        Gets the primary analysis parameters.
        """
        pass

    @property
    @abstractmethod
    def base(self) -> "AssetsReader":
        """
        Gets the base primary analyzer's context, which lets you inspect and load its
        outputs.
        """
        pass

    @abstractmethod
    def dependency(
        self, secondary_interface: SecondaryAnalyzerInterface
    ) -> "AssetsReader":
        """
        Gets the context of a secondary analyzer the current module depends on, which
        lets you inspect and load its outputs.
        """
        pass
base abstractmethod property

Gets the base primary analyzer's context, which lets you inspect and load its outputs.

base_params abstractmethod property

Gets the primary analysis parameters.

temp_dir instance-attribute

Gets the temporary directory that the module can freely write content to during its lifetime. This directory will not persist between runs.

dependency(secondary_interface) abstractmethod

Gets the context of a secondary analyzer the current module depends on, which lets you inspect and load its outputs.

Source code in analyzer_interface/context.py
76
77
78
79
80
81
82
83
84
@abstractmethod
def dependency(
    self, secondary_interface: SecondaryAnalyzerInterface
) -> "AssetsReader":
    """
    Gets the context of a secondary analyzer the current module depends on, which
    lets you inspect and load its outputs.
    """
    pass

FactoryOutputContext

Bases: BaseModel

Output interface for both factory and api_facotry functions for web presenters.

Source code in analyzer_interface/context.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
class FactoryOutputContext(BaseModel):
    """
    Output interface for both factory and api_facotry functions for web
    presenters.
    """

    shiny: Optional[ShinyContext] = None
    """
    Factory oputput for shiny dashboards
    """

    api: Optional[dict[str, Any]] = None
    """
    API factory output for React dashboard REST API
    """

    data_frames: Optional[dict[str, DataFrame]] = None
    """
    API factory dataframe output for React dashboard REST API
    """

    class Config:
        arbitrary_types_allowed = True
api = None class-attribute instance-attribute

API factory output for React dashboard REST API

data_frames = None class-attribute instance-attribute

API factory dataframe output for React dashboard REST API

shiny = None class-attribute instance-attribute

Factory oputput for shiny dashboards

InputTableReader

Bases: TableReader

Source code in analyzer_interface/context.py
139
140
141
142
143
144
145
146
147
148
149
class InputTableReader(TableReader):
    @abstractmethod
    def preprocess[
        PolarsDataFrameLike
    ](self, df: PolarsDataFrameLike) -> PolarsDataFrameLike:
        """
        Given the manually loaded user input dataframe, apply column mapping and
        semantic transformations to give the input dataframe that the analyzer
        expects.
        """
        pass
preprocess(df) abstractmethod

Given the manually loaded user input dataframe, apply column mapping and semantic transformations to give the input dataframe that the analyzer expects.

Source code in analyzer_interface/context.py
140
141
142
143
144
145
146
147
148
149
@abstractmethod
def preprocess[
    PolarsDataFrameLike
](self, df: PolarsDataFrameLike) -> PolarsDataFrameLike:
    """
    Given the manually loaded user input dataframe, apply column mapping and
    semantic transformations to give the input dataframe that the analyzer
    expects.
    """
    pass

PrimaryAnalyzerContext

Bases: ABC, BaseModel

Source code in analyzer_interface/context.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
class PrimaryAnalyzerContext(ABC, BaseModel):
    temp_dir: str
    """
  Gets the temporary directory that the module can freely write content to
  during its lifetime. This directory will not persist between runs.
  """

    @abstractmethod
    def input(self) -> "InputTableReader":
        """
        Gets the input reader context.

        **Note that this is in function form** even though one input is expected,
        in anticipation that we may want to support multiple inputs in the future.
        """
        pass

    @property
    @abstractmethod
    def params(self) -> dict[str, ParamValue]:
        """
        Gets the analysis parameters.
        """
        pass

    @abstractmethod
    def output(self, output_id: str) -> "TableWriter":
        """
        Gets the output writer context for the specified output ID.
        """
        pass
params abstractmethod property

Gets the analysis parameters.

temp_dir instance-attribute

Gets the temporary directory that the module can freely write content to during its lifetime. This directory will not persist between runs.

input() abstractmethod

Gets the input reader context.

Note that this is in function form even though one input is expected, in anticipation that we may want to support multiple inputs in the future.

Source code in analyzer_interface/context.py
22
23
24
25
26
27
28
29
30
@abstractmethod
def input(self) -> "InputTableReader":
    """
    Gets the input reader context.

    **Note that this is in function form** even though one input is expected,
    in anticipation that we may want to support multiple inputs in the future.
    """
    pass
output(output_id) abstractmethod

Gets the output writer context for the specified output ID.

Source code in analyzer_interface/context.py
40
41
42
43
44
45
@abstractmethod
def output(self, output_id: str) -> "TableWriter":
    """
    Gets the output writer context for the specified output ID.
    """
    pass

SecondaryAnalyzerContext

Bases: BaseDerivedModuleContext

Source code in analyzer_interface/context.py
107
108
109
110
111
112
113
class SecondaryAnalyzerContext(BaseDerivedModuleContext):
    @abstractmethod
    def output(self, output_id: str) -> "TableWriter":
        """
        Gets the output writer context
        """
        pass
output(output_id) abstractmethod

Gets the output writer context

Source code in analyzer_interface/context.py
108
109
110
111
112
113
@abstractmethod
def output(self, output_id: str) -> "TableWriter":
    """
    Gets the output writer context
    """
    pass

ShinyContext

Bases: BaseModel

Output interface for Shiny dashboards

Source code in analyzer_interface/context.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
class ShinyContext(BaseModel):
    """
    Output interface for Shiny dashboards
    """

    panel: NavPanel = None
    """
    UI navigation panel to be added to shiny dashboard
    """

    server_handler: Optional[ServerCallback] = None
    """
    Server handler callback to be called by the shiny application instance
    """

    class Config:
        arbitrary_types_allowed = True
panel = None class-attribute instance-attribute

UI navigation panel to be added to shiny dashboard

server_handler = None class-attribute instance-attribute

Server handler callback to be called by the shiny application instance

TableReader

Bases: ABC

Source code in analyzer_interface/context.py
125
126
127
128
129
130
131
132
133
class TableReader(ABC):
    @property
    @abstractmethod
    def parquet_path(self) -> str:
        """
        Gets the path to the table's parquet file. The module should expect a parquet
        file here.
        """
        pass
parquet_path abstractmethod property

Gets the path to the table's parquet file. The module should expect a parquet file here.

TableWriter

Bases: ABC

Source code in analyzer_interface/context.py
152
153
154
155
156
157
158
159
160
class TableWriter(ABC):
    @property
    @abstractmethod
    def parquet_path(self) -> str:
        """
        Gets the path to the table's parquet file. The module should write a parquet
        file to it.
        """
        pass
parquet_path abstractmethod property

Gets the path to the table's parquet file. The module should write a parquet file to it.

WebPresenterContext

Bases: BaseDerivedModuleContext

Source code in analyzer_interface/context.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
class WebPresenterContext(BaseDerivedModuleContext):
    dash_app: Dash
    """
  The Dash app that is being built.
  """

    @property
    @abstractmethod
    def state_dir(self) -> str:
        """
        Gets the directory where the web presenter can store state that persists
        between runs. This state space is unique for each
        project/primary analyzer/web presenter combination.
        """
        pass

    class Config:
        arbitrary_types_allowed = True
dash_app instance-attribute

The Dash app that is being built.

state_dir abstractmethod property

Gets the directory where the web presenter can store state that persists between runs. This state space is unique for each project/primary analyzer/web presenter combination.

data_type_compatibility

data_type_mapping_preference = {'text': [['text'], ['identifier', 'url']], 'integer': [['integer']], 'float': [['float', 'integer']], 'boolean': [['boolean']], 'datetime': [['datetime']], 'time': [['time'], ['datetime']], 'identifier': [['identifier'], ['url', 'datetime'], ['integer'], ['text']], 'url': [['url']]} module-attribute

For each data type, a list of lists of data types that are considered compatible with it. The first list is the most preferred, the last list is the least. The items in each list are considered equally compatible.

get_data_type_compatibility_score(expected_data_type, actual_data_type)

Returns a score for the compatibility of the actual data type with the expected data type. Higher (less negative) scores are better. None means the data types are not compatible.

Source code in analyzer_interface/data_type_compatibility.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def get_data_type_compatibility_score(
    expected_data_type: DataType, actual_data_type: DataType
):
    """
    Returns a score for the compatibility of the actual data type with the
    expected data type. Higher (less negative) scores are better.
    `None` means the data types are not compatible.
    """
    if expected_data_type == actual_data_type:
        return 0

    for i, preference_list in enumerate(
        data_type_mapping_preference[expected_data_type]
    ):
        if actual_data_type in preference_list:
            return -(i + 1)

    return None

declaration

AnalyzerDeclaration

Bases: AnalyzerInterface

Source code in analyzer_interface/declaration.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class AnalyzerDeclaration(AnalyzerInterface):
    entry_point: Callable[[PrimaryAnalyzerContext], None]
    default_params: Callable[[PrimaryAnalyzerContext], dict[str, ParamValue]]
    is_distributed: bool

    def __init__(
        self,
        interface: AnalyzerInterface,
        main: Callable,
        *,
        is_distributed: bool = False,
        default_params: Callable[[PrimaryAnalyzerContext], dict[str, ParamValue]] = (
            lambda _: dict()
        )
    ):
        """Creates a primary analyzer declaration

        Args:
          interface (AnalyzerInterface): The metadata interface for the primary analyzer.

          main (Callable):
            The entry point function for the primary analyzer. This function should
            take a single argument of type `PrimaryAnalyzerContext` and should ensure
            that the outputs specified in the interface are generated.

          is_distributed (bool):
            Set this explicitly to `True` once the analyzer is ready to be shipped
            to end users; it will make the analyzer available in the distributed
            executable.
        """
        super().__init__(
            **interface.model_dump(),
            entry_point=main,
            default_params=default_params,
            is_distributed=is_distributed
        )
__init__(interface, main, *, is_distributed=False, default_params=lambda _: dict())

Creates a primary analyzer declaration

Parameters:

Name Type Description Default
interface
AnalyzerInterface

The metadata interface for the primary analyzer.

required
main
Callable

The entry point function for the primary analyzer. This function should take a single argument of type PrimaryAnalyzerContext and should ensure that the outputs specified in the interface are generated.

required
is_distributed
bool

Set this explicitly to True once the analyzer is ready to be shipped to end users; it will make the analyzer available in the distributed executable.

False
Source code in analyzer_interface/declaration.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def __init__(
    self,
    interface: AnalyzerInterface,
    main: Callable,
    *,
    is_distributed: bool = False,
    default_params: Callable[[PrimaryAnalyzerContext], dict[str, ParamValue]] = (
        lambda _: dict()
    )
):
    """Creates a primary analyzer declaration

    Args:
      interface (AnalyzerInterface): The metadata interface for the primary analyzer.

      main (Callable):
        The entry point function for the primary analyzer. This function should
        take a single argument of type `PrimaryAnalyzerContext` and should ensure
        that the outputs specified in the interface are generated.

      is_distributed (bool):
        Set this explicitly to `True` once the analyzer is ready to be shipped
        to end users; it will make the analyzer available in the distributed
        executable.
    """
    super().__init__(
        **interface.model_dump(),
        entry_point=main,
        default_params=default_params,
        is_distributed=is_distributed
    )

SecondaryAnalyzerDeclaration

Bases: SecondaryAnalyzerInterface

Source code in analyzer_interface/declaration.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class SecondaryAnalyzerDeclaration(SecondaryAnalyzerInterface):
    entry_point: Callable[["SecondaryAnalyzerContext"], None]

    def __init__(self, interface: SecondaryAnalyzerInterface, main: Callable):
        """Creates a secondary analyzer declaration

        Args:
          interface (SecondaryAnalyzerInterface): The metadata interface for the secondary analyzer.

          main (Callable):
            The entry point function for the secondary analyzer. This function should
            take a single argument of type `SecondaryAnalyzerContext` and should ensure
            that the outputs specified in the interface are generated.
        """
        super().__init__(**interface.model_dump(), entry_point=main)
__init__(interface, main)

Creates a secondary analyzer declaration

Parameters:

Name Type Description Default
interface
SecondaryAnalyzerInterface

The metadata interface for the secondary analyzer.

required
main
Callable

The entry point function for the secondary analyzer. This function should take a single argument of type SecondaryAnalyzerContext and should ensure that the outputs specified in the interface are generated.

required
Source code in analyzer_interface/declaration.py
58
59
60
61
62
63
64
65
66
67
68
69
def __init__(self, interface: SecondaryAnalyzerInterface, main: Callable):
    """Creates a secondary analyzer declaration

    Args:
      interface (SecondaryAnalyzerInterface): The metadata interface for the secondary analyzer.

      main (Callable):
        The entry point function for the secondary analyzer. This function should
        take a single argument of type `SecondaryAnalyzerContext` and should ensure
        that the outputs specified in the interface are generated.
    """
    super().__init__(**interface.model_dump(), entry_point=main)

WebPresenterDeclaration

Bases: WebPresenterInterface

Source code in analyzer_interface/declaration.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
class WebPresenterDeclaration(WebPresenterInterface):
    factory: Callable[["WebPresenterContext"], Union[FactoryOutputContext, None]]
    shiny: bool
    server_name: str

    def __init__(
        self,
        interface: WebPresenterInterface,
        factory: Callable,
        name: str,
        shiny: bool,
    ):
        """Creates a web presenter declaration

        Args:
          interface (WebPresenterInterface): The metadata interface for the web presenter.

          factory (Callable):
            The factory function that creates a Dash app for the web presenter. It should
            modify the Dash app in the context to add whatever plotting interface
            the web presenter needs.

          server_name (str):
            The server name for the Dash app. Typically, you will use the global
            variable `__name__` here.

            If your web presenter has assets like images, CSS or JavaScript files,
            you can put them in a folder named `assets` in the same directory
            as the file where `__name__` is used. The Dash app will serve these
            files at the `/assets/` URL, using the python module name in `__name__`
            to determine the absolute path to the assets folder.

            See Dash documentation for more details: https://dash.plotly.com
            See also Python documentation for the `__name__` variable:
            https://docs.python.org/3/tutorial/modules.html

        """
        super().__init__(
            **interface.model_dump(), factory=factory, server_name=name, shiny=shiny
        )
__init__(interface, factory, name, shiny)

Creates a web presenter declaration

Parameters:

Name Type Description Default
interface
WebPresenterInterface

The metadata interface for the web presenter.

required
factory
Callable

The factory function that creates a Dash app for the web presenter. It should modify the Dash app in the context to add whatever plotting interface the web presenter needs.

required
server_name
str

The server name for the Dash app. Typically, you will use the global variable __name__ here.

If your web presenter has assets like images, CSS or JavaScript files, you can put them in a folder named assets in the same directory as the file where __name__ is used. The Dash app will serve these files at the /assets/ URL, using the python module name in __name__ to determine the absolute path to the assets folder.

See Dash documentation for more details: https://dash.plotly.com See also Python documentation for the __name__ variable: https://docs.python.org/3/tutorial/modules.html

required
Source code in analyzer_interface/declaration.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def __init__(
    self,
    interface: WebPresenterInterface,
    factory: Callable,
    name: str,
    shiny: bool,
):
    """Creates a web presenter declaration

    Args:
      interface (WebPresenterInterface): The metadata interface for the web presenter.

      factory (Callable):
        The factory function that creates a Dash app for the web presenter. It should
        modify the Dash app in the context to add whatever plotting interface
        the web presenter needs.

      server_name (str):
        The server name for the Dash app. Typically, you will use the global
        variable `__name__` here.

        If your web presenter has assets like images, CSS or JavaScript files,
        you can put them in a folder named `assets` in the same directory
        as the file where `__name__` is used. The Dash app will serve these
        files at the `/assets/` URL, using the python module name in `__name__`
        to determine the absolute path to the assets folder.

        See Dash documentation for more details: https://dash.plotly.com
        See also Python documentation for the `__name__` variable:
        https://docs.python.org/3/tutorial/modules.html

    """
    super().__init__(
        **interface.model_dump(), factory=factory, server_name=name, shiny=shiny
    )

interface

DataType = Literal['text', 'integer', 'float', 'boolean', 'datetime', 'identifier', 'url', 'time'] module-attribute

The semantic data type for a data column. This is not quite the same as structural data types like polars or pandas or even arrow types, but they represent how the data is intended to be interpreted.

  • text is expected to be a free-form human-readable text content.
  • integer and float are meant to be manipulated arithmetically.
  • boolean is a binary value.
  • datetime represents time and are meant to be manipulated as time values.
  • time represents time within a day, not including the date information.
  • identifier is a unique identifier for a record. It is not expected to be manipulated in any way.
  • url is a string that represents a URL.

AnalyzerInterface

Bases: BaseAnalyzerInterface

Source code in analyzer_interface/interface.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
class AnalyzerInterface(BaseAnalyzerInterface):
    input: AnalyzerInput
    """
  Specifies the input data schema for the analyzer.
  """

    params: list[AnalyzerParam] = []
    """
  A list of parameters that the analyzer accepts.
  """

    outputs: list["AnalyzerOutput"]
    """
  Specifies the output data schema for the analyzer.
  """

    kind: Literal["primary"] = "primary"
input instance-attribute

Specifies the input data schema for the analyzer.

outputs instance-attribute

Specifies the output data schema for the analyzer.

params = [] class-attribute instance-attribute

A list of parameters that the analyzer accepts.

AnalyzerOutput

Bases: BaseModel

Source code in analyzer_interface/interface.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
class AnalyzerOutput(BaseModel):
    id: str
    """
  Uniquely identifies the output data schema for the analyzer. The analyzer
  must include this key in the output dictionary.
  """

    name: str
    """The human-friendly for the output."""

    description: Optional[str] = None

    columns: list["OutputColumn"]

    internal: bool = False

    def get_column_by_name(self, name: str):
        for column in self.columns:
            if column.name == name:
                return column
        return None

    def transform_output(self, output_df: pl.LazyFrame | pl.DataFrame):
        output_columns = output_df.lazy().collect_schema().names()
        return output_df.select(
            [
                pl.col(col_name).alias(
                    output_spec.human_readable_name_or_fallback()
                    if output_spec
                    else col_name
                )
                for col_name in output_columns
                if (output_spec := self.get_column_by_name(col_name)) or True
            ]
        )
id instance-attribute

Uniquely identifies the output data schema for the analyzer. The analyzer must include this key in the output dictionary.

name instance-attribute

The human-friendly for the output.

AnalyzerParam

Bases: BaseModel

Source code in analyzer_interface/interface.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class AnalyzerParam(BaseModel):
    id: str
    """
    The name of the parameter. This becomes the key in the parameters dictionary
    that is passed to the analyzer.
    """

    human_readable_name: Optional[str] = None
    """
    The human-friendly name for the parameter. This is used in the UI to
    represent the parameter.
    """

    description: Optional[str] = None
    """
    A short description of the parameter. This is used in the UI to represent
    the parameter.
    """

    type: ParamType
    """
    The type of the parameter. This is used for validation and for customizing
    the UX for parameter input.
    """

    default: Optional[ParamValue] = None
    """
    Optional: define a static default value for this parameter. A parameter
    without a default will need to be chosen explicitly by the user.
    """

    backfill_value: Optional[ParamValue] = None
    """
    Recommended if this is a parameter that is newly introduced in a previously
    released analyzer. The backfill is show what this parameter was before it
    became customizable.
    """

    @property
    def print_name(self):
        return self.human_readable_name or self.id
backfill_value = None class-attribute instance-attribute

Recommended if this is a parameter that is newly introduced in a previously released analyzer. The backfill is show what this parameter was before it became customizable.

default = None class-attribute instance-attribute

Optional: define a static default value for this parameter. A parameter without a default will need to be chosen explicitly by the user.

description = None class-attribute instance-attribute

A short description of the parameter. This is used in the UI to represent the parameter.

human_readable_name = None class-attribute instance-attribute

The human-friendly name for the parameter. This is used in the UI to represent the parameter.

id instance-attribute

The name of the parameter. This becomes the key in the parameters dictionary that is passed to the analyzer.

type instance-attribute

The type of the parameter. This is used for validation and for customizing the UX for parameter input.

BaseAnalyzerInterface

Bases: BaseModel

Source code in analyzer_interface/interface.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class BaseAnalyzerInterface(BaseModel):
    id: str
    """
  The static ID for the analyzer that, with the version, uniquely identifies the
  analyzer and will be stored as metadata as part of the output data.
  """

    version: str
    """
  The version ID for the analyzer. In future, we may choose to support output
  migration between versions of the same analyzer.
  """

    name: str
    """
  The short human-readable name of the analyzer.
  """

    short_description: str
    """
  A short, one-liner description of what the analyzer does.
  """

    long_description: Optional[str] = None
    """
  A longer description of what the analyzer does that will be shown separately.
  """
id instance-attribute

The static ID for the analyzer that, with the version, uniquely identifies the analyzer and will be stored as metadata as part of the output data.

long_description = None class-attribute instance-attribute

A longer description of what the analyzer does that will be shown separately.

name instance-attribute

The short human-readable name of the analyzer.

short_description instance-attribute

A short, one-liner description of what the analyzer does.

version instance-attribute

The version ID for the analyzer. In future, we may choose to support output migration between versions of the same analyzer.

DerivedAnalyzerInterface

Bases: BaseAnalyzerInterface

Source code in analyzer_interface/interface.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
class DerivedAnalyzerInterface(BaseAnalyzerInterface):
    base_analyzer: AnalyzerInterface
    """
  The base analyzer that this secondary analyzer extends. This is always a primary
  analyzer. If your module depends on other secondary analyzers (which must have
  the same base analyzer), you can specify them in the `depends_on` field.
  """

    depends_on: list["SecondaryAnalyzerInterface"] = []
    """
  A dictionary of secondary analyzers that must be run before the current analyzer
  secondary analyzer is run. These secondary analyzers must have the same
  primary base.
  """
base_analyzer instance-attribute

The base analyzer that this secondary analyzer extends. This is always a primary analyzer. If your module depends on other secondary analyzers (which must have the same base analyzer), you can specify them in the depends_on field.

depends_on = [] class-attribute instance-attribute

A dictionary of secondary analyzers that must be run before the current analyzer secondary analyzer is run. These secondary analyzers must have the same primary base.

InputColumn

Bases: Column

Source code in analyzer_interface/interface.py
198
199
200
201
202
203
204
205
206
207
class InputColumn(Column):
    name_hints: list[str] = []
    """
  Specifies a list of space-separated words that are likely to be found in the
  column name of the user-provided data. This is used to help the user map the
  input columns to the expected columns.

  Any individual hint matching is sufficient for a match to be called. The hint
  in turn is matched if every word matches some part of the column name.
  """
name_hints = [] class-attribute instance-attribute

Specifies a list of space-separated words that are likely to be found in the column name of the user-provided data. This is used to help the user map the input columns to the expected columns.

Any individual hint matching is sufficient for a match to be called. The hint in turn is matched if every word matches some part of the column name.

SecondaryAnalyzerInterface

Bases: DerivedAnalyzerInterface

Source code in analyzer_interface/interface.py
157
158
159
160
161
162
163
class SecondaryAnalyzerInterface(DerivedAnalyzerInterface):
    outputs: list[AnalyzerOutput]
    """
  Specifies the output data schema for the analyzer.
  """

    kind: Literal["secondary"] = "secondary"
outputs instance-attribute

Specifies the output data schema for the analyzer.

params

IntegerParam

Bases: BaseModel

Represents an integer value

The corresponding value will be of type int.

Source code in analyzer_interface/params.py
16
17
18
19
20
21
22
23
24
25
class IntegerParam(BaseModel):
    """
    Represents an integer value

    The corresponding value will be of type `int`.
    """

    type: Literal["integer"] = "integer"
    min: int
    max: int

TimeBinningParam

Bases: BaseModel

Represents a time bin.

The corresponding value will be of type TimeBinningValue.

Source code in analyzer_interface/params.py
28
29
30
31
32
33
34
35
class TimeBinningParam(BaseModel):
    """
    Represents a time bin.

    The corresponding value will be of type `TimeBinningValue`.
    """

    type: Literal["time_binning"] = "time_binning"

TimeBinningValue

Bases: BaseModel

Source code in analyzer_interface/params.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class TimeBinningValue(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)

    unit: TimeBinningUnit
    amount: int

    def to_polars_truncate_spec(self) -> str:
        """
        Converts the value to a string that can be used in Polars truncate spec.
        See https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.dt.truncate.html
        """
        amount = self.amount
        unit = self.unit
        if unit == "year":
            return f"{amount}y"
        if unit == "month":
            return f"{amount}mo"
        if unit == "week":
            return f"{amount}w"
        if unit == "day":
            return f"{amount}d"
        if unit == "hour":
            return f"{amount}h"
        if unit == "minute":
            return f"{amount}m"
        if unit == "second":
            return f"{amount}s"

        raise ValueError("Invalid time binning value")

    def to_human_readable_text(self) -> str:
        amount = self.amount
        unit = self.unit

        if unit == "year":
            return f"{amount} year{'s' if amount > 1 else ''}"
        if unit == "month":
            return f"{amount} month{'s' if amount > 1 else ''}"
        if unit == "week":
            return f"{amount} week{'s' if amount > 1 else ''}"
        if unit == "day":
            return f"{amount} day{'s' if amount > 1 else ''}"
        if unit == "hour":
            return f"{amount} hour{'s' if amount > 1 else ''}"
        if unit == "minute":
            return f"{amount} minute{'s' if amount > 1 else ''}"
        if unit == "second":
            return f"{amount} second{'s' if amount > 1 else ''}"

        raise ValueError("Invalid time binning value")
to_polars_truncate_spec()

Converts the value to a string that can be used in Polars truncate spec. See https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.dt.truncate.html

Source code in analyzer_interface/params.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def to_polars_truncate_spec(self) -> str:
    """
    Converts the value to a string that can be used in Polars truncate spec.
    See https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.dt.truncate.html
    """
    amount = self.amount
    unit = self.unit
    if unit == "year":
        return f"{amount}y"
    if unit == "month":
        return f"{amount}mo"
    if unit == "week":
        return f"{amount}w"
    if unit == "day":
        return f"{amount}d"
    if unit == "hour":
        return f"{amount}h"
    if unit == "minute":
        return f"{amount}m"
    if unit == "second":
        return f"{amount}s"

    raise ValueError("Invalid time binning value")