Skip to content

Commit 6892d16

Browse files
committed
NeTEx: meilleure sérialisation des résultats
- [x] Ajoute une colonne multi_validation.binary_result (bytea). - [x] Script de backfill de cette colonne. - [ ] Remplit le binary_result à la validation. - [x] Utilise le digest et le binary_result et n’utilise plus le result dans la page de détails d’une ressource NeTEx. - [ ] Utilise le digest et le binary_result et n’utilise plus le result dans la page de validation à la demande NeTEx. - [ ] Cache du binary_result dans un GenServer pour accélérer la pagination.
1 parent 117106c commit 6892d16

File tree

10 files changed

+591
-20
lines changed

10 files changed

+591
-20
lines changed

apps/transport/lib/db/multi_validation.ex

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,15 @@ defmodule DB.MultiValidation do
3131
timestamps(type: :utc_datetime_usec)
3232

3333
field(:digest, :map)
34+
field(:binary_result, :binary, load_in_query: false)
3435
end
3536

3637
def base_query(opts \\ []) do
3738
include_result = Keyword.get(opts, :include_result, false)
3839

3940
if include_result do
4041
from(mv in DB.MultiValidation, as: :multi_validation)
41-
|> select_merge([mv], %{result: mv.result})
42+
|> select_merge([mv], %{result: mv.result, binary_result: mv.binary_result})
4243
else
4344
from(mv in DB.MultiValidation, as: :multi_validation)
4445
end

apps/transport/lib/transport_web/controllers/resource_controller.ex

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,9 @@ defmodule TransportWeb.ResourceController do
166166
end
167167

168168
defp render_gtfs_details(conn, params, resource, validation) do
169-
validation_details = {_, _, _, _, issues} = build_gtfs_validation_details(validation, params)
169+
config = make_pagination_config(params)
170+
171+
{validation_details, issues} = build_gtfs_validation_details(validation, params)
170172

171173
issue_type =
172174
case params["issue_type"] do
@@ -175,65 +177,92 @@ defmodule TransportWeb.ResourceController do
175177
end
176178

177179
conn
178-
|> assign_base_resource_details(params, resource, validation_details)
180+
|> assign_base_resource_details(resource, validation_details)
181+
|> assign(:issues, Scrivener.paginate(issues, config))
179182
|> assign(:validator, Transport.Validators.GTFSTransport)
180183
|> assign(:data_vis, encoded_data_vis(issue_type, validation))
181184
|> render("gtfs_details.html")
182185
end
183186

184-
defp build_gtfs_validation_details(nil, _params), do: {nil, nil, nil, [], []}
187+
defp build_gtfs_validation_details(nil, _params), do: {{nil, nil, nil, []}, []}
185188

186189
defp build_gtfs_validation_details(%{result: validation_result, metadata: metadata = %DB.ResourceMetadata{}}, params) do
187190
summary = Transport.Validators.GTFSTransport.summary(validation_result)
188191
stats = Transport.Validators.GTFSTransport.count_by_severity(validation_result)
189192
issues = Transport.Validators.GTFSTransport.get_issues(validation_result, params)
190193

191-
{summary, stats, metadata.metadata, metadata.modes, issues}
194+
{{summary, stats, metadata.metadata, metadata.modes}, issues}
192195
end
193196

194197
defp render_netex_details(conn, params, resource, validation) do
195-
{results_adapter, validation_details, errors_template, max_severity} =
198+
config = make_pagination_config(params)
199+
200+
{results_adapter, validation_details, issues, errors_template, max_severity} =
196201
build_netex_validation_details(validation, params)
197202

198203
conn
199-
|> assign_base_resource_details(params, resource, validation_details)
204+
|> assign_base_resource_details(resource, validation_details)
205+
|> assign(:issues, fake_paginate(issues, config))
200206
|> assign(:errors_template, errors_template)
201207
|> assign(:results_adapter, results_adapter)
202208
|> assign(:max_severity, max_severity)
203209
|> assign(:data_vis, nil)
204210
|> render("netex_details.html")
205211
end
206212

207-
defp build_netex_validation_details(nil, _params), do: {nil, {nil, nil, nil, [], []}, nil, nil}
213+
defp fake_paginate({total_entries, issues}, config) do
214+
total_pages = div(total_entries, config.page_size)
215+
216+
total_pages =
217+
if rem(total_entries, config.page_size) > 0 do
218+
total_pages + 1
219+
else
220+
total_pages
221+
end
222+
223+
%Scrivener.Page{
224+
entries: issues,
225+
page_number: config.page_number,
226+
page_size: config.page_size,
227+
total_entries: total_entries,
228+
total_pages: total_pages
229+
}
230+
end
231+
232+
defp build_netex_validation_details(nil, _params), do: {nil, {nil, nil, nil, []}, {0, []}, nil, nil}
208233

209234
defp build_netex_validation_details(
210-
%{validator_version: version, result: validation_result, metadata: metadata = %DB.ResourceMetadata{}},
235+
%{
236+
validator_version: version,
237+
digest: digest,
238+
binary_result: binary_result,
239+
metadata: metadata = %DB.ResourceMetadata{}
240+
},
211241
params
212242
) do
213243
results_adapter = Transport.Validators.NeTEx.ResultsAdapter.resolve(version)
214-
summary = results_adapter.summary(validation_result)
215-
stats = results_adapter.count_by_severity(validation_result)
216-
issues = results_adapter.get_issues(validation_result, params)
244+
summary = digest["summary"]
245+
stats = digest["stats"]
217246
errors_template = pick_netex_errors_template(version)
218-
max_severity = results_adapter.count_max_severity(validation_result)
247+
max_severity = digest["max_severity"]
248+
249+
pagination_config = make_pagination_config(params)
250+
issues = results_adapter.get_issues(binary_result, params, pagination_config)
219251

220-
{results_adapter, {summary, stats, metadata.metadata, metadata.modes, issues}, errors_template, max_severity}
252+
{results_adapter, {summary, stats, metadata.metadata, metadata.modes}, issues, errors_template, max_severity}
221253
end
222254

223255
defp pick_netex_errors_template("0.2.1"), do: "_netex_validation_errors_v0_2_x.html"
224256
defp pick_netex_errors_template("0.2.0"), do: "_netex_validation_errors_v0_2_x.html"
225257
defp pick_netex_errors_template(_), do: "_netex_validation_errors_v0_1_0.html"
226258

227-
defp assign_base_resource_details(conn, params, resource, validation_details) do
228-
config = make_pagination_config(params)
229-
230-
{validation_summary, severities_count, metadata, modes, issues} = validation_details
259+
defp assign_base_resource_details(conn, resource, validation_details) do
260+
{validation_summary, severities_count, metadata, modes} = validation_details
231261

232262
conn
233263
|> assign(:related_files, Resource.get_related_files(resource))
234264
|> assign(:resource, resource)
235265
|> assign(:other_resources, Resource.other_resources(resource))
236-
|> assign(:issues, Scrivener.paginate(issues, config))
237266
|> assign(:validation_summary, validation_summary)
238267
|> assign(:severities_count, severities_count)
239268
|> assign(:metadata, metadata)

apps/transport/lib/validators/netex/results_adapter.ex

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ defmodule Transport.Validators.NeTEx.ResultsAdapter do
1212
@callback count_max_severity(map()) :: {binary(), integer()}
1313
@callback no_error?(binary()) :: boolean()
1414
@callback french_profile_compliance_check() :: :none | :partial | :good_enough
15+
@callback to_dataframe(list()) :: Explorer.DataFrame.t()
16+
@callback to_binary_result(list()) :: binary()
1517

1618
def resolve("0.2.1"), do: Transport.Validators.NeTEx.ResultsAdapters.V0_2_1
1719
def resolve("0.2.0"), do: Transport.Validators.NeTEx.ResultsAdapters.V0_2_0
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
defmodule Transport.Validators.NeTEx.ResultsAdapters.Commons do
2+
@moduledoc """
3+
Code in this module is meant to be moved to Transport.Validators.NeTEx.ResultsAdapters.v***,
4+
maybe as an helper module to be shared among versions.
5+
6+
We will also need it for backfilling purposes.
7+
"""
8+
require Explorer.DataFrame, as: DF
9+
alias Explorer.Series, as: S
10+
11+
@dtypes [
12+
category: :category,
13+
code: :category,
14+
criticity: :category,
15+
message: :string,
16+
"resource.class": :category,
17+
"resource.class": :string,
18+
"resource.column": {:u, 8},
19+
"resource.filename": :category,
20+
"resource.line": {:u, 16}
21+
]
22+
23+
def to_dataframe(errors, extra_attributes_fun) do
24+
errors
25+
|> Enum.with_index()
26+
|> Enum.map(&project_error(&1, extra_attributes_fun))
27+
|> DF.new(dtypes: @dtypes)
28+
end
29+
30+
defp project_error({entry, _index}, extra_attributes_fun) do
31+
mandatory_attributes = build_mandatory_attributes(entry)
32+
33+
resource = Map.get(entry, "resource", %{})
34+
35+
mandatory_attributes
36+
|> Map.merge(build_resource_attributes(resource))
37+
|> Map.merge(extra_attributes_fun.(mandatory_attributes))
38+
end
39+
40+
defp build_mandatory_attributes(entry) do
41+
%{
42+
"code" => "unknown-code",
43+
"criticity" => "error",
44+
"message" => "Unknown error"
45+
}
46+
|> build_with_default_attributes(entry)
47+
end
48+
49+
defp build_resource_attributes(resource) do
50+
%{
51+
"resource.id" => nil,
52+
"resource.line" => nil,
53+
"resource.class" => nil,
54+
"resource.column" => nil,
55+
"resource.filename" => nil
56+
}
57+
|> build_with_default_attributes(prefix_keys(resource, "resource."))
58+
end
59+
60+
defp prefix_keys(map, prefix) do
61+
rename_keys(map, fn key -> "#{prefix}#{key}" end)
62+
end
63+
64+
defp rename_keys(map, fun) do
65+
map
66+
|> Map.to_list()
67+
|> Enum.map(fn {key, value} -> {fun.(key), value} end)
68+
|> Map.new()
69+
end
70+
71+
defp build_with_default_attributes(defaults, attributes) do
72+
Map.merge(defaults, Map.intersect(defaults, attributes))
73+
end
74+
75+
def search(df, page, category) do
76+
df
77+
|> DF.filter(category == ^category)
78+
|> DF.slice(page(page))
79+
|> DF.select(["code", "message", "resource.filename", "resource.line"])
80+
|> DF.to_rows()
81+
end
82+
83+
def slice(df, %Scrivener.Config{} = config) do
84+
df
85+
|> DF.slice(page(config))
86+
|> DF.select(["code", "criticity", "message", "resource.filename", "resource.line"])
87+
|> DF.to_rows()
88+
end
89+
90+
defp page(%Scrivener.Config{} = config) do
91+
first = (config.page_number - 1) * config.page_size
92+
last = config.page_number * config.page_size - 1
93+
Range.new(first, last)
94+
end
95+
96+
defp page(page_number) do
97+
%Scrivener.Config{page_size: page_size} = TransportWeb.PaginationHelpers.make_pagination_config(%{})
98+
first = (page_number - 1) * page_size
99+
last = page_number * page_size - 1
100+
Range.new(first, last)
101+
end
102+
103+
def to_binary(%Explorer.DataFrame{} = df) do
104+
DF.dump_parquet!(df, compression: :brotli)
105+
end
106+
107+
def from_binary(binary) when is_binary(binary) do
108+
DF.load_parquet!(binary)
109+
end
110+
111+
def to_issues(entries), do: Enum.map(entries, &to_issue/1)
112+
113+
defp to_issue(%{} = entry) do
114+
%{
115+
"code" => entry["code"],
116+
"message" => entry["message"],
117+
"criticity" => entry["criticity"],
118+
"resource" =>
119+
%{
120+
"filename" => entry["resource.filename"],
121+
"line" => entry["resource.line"]
122+
}
123+
|> drop_empty_values()
124+
}
125+
|> drop_empty_values()
126+
end
127+
128+
def drop_empty_values(map), do: Map.filter(map, fn {_key, value} -> value != %{} and not is_nil(value) end)
129+
130+
def get_values(%Explorer.DataFrame{} = df, column) do
131+
df
132+
|> DF.distinct([column])
133+
|> DF.to_rows()
134+
|> Enum.map(& &1[column])
135+
end
136+
137+
def count_and_slice(%Explorer.DataFrame{} = df, pagination_config) do
138+
total_count = S.count(df["code"])
139+
140+
issues =
141+
df
142+
|> slice(pagination_config)
143+
|> to_issues()
144+
145+
{total_count, issues}
146+
end
147+
end

apps/transport/lib/validators/netex/results_adapters/v0_1_0.ex

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ defmodule Transport.Validators.NeTEx.ResultsAdapters.V0_1_0 do
55

66
use Gettext, backend: TransportWeb.Gettext
77

8+
require Explorer.DataFrame, as: DF
9+
alias Transport.Validators.NeTEx.ResultsAdapters.Commons
10+
811
@behaviour Transport.Validators.NeTEx.ResultsAdapter
912

1013
@no_error "NoError"
@@ -171,12 +174,14 @@ defmodule Transport.Validators.NeTEx.ResultsAdapters.V0_1_0 do
171174
[]
172175
"""
173176
@impl Transport.Validators.NeTEx.ResultsAdapter
177+
# DEPRECATED
174178
def get_issues(%{} = validation_result, %{"issue_type" => issue_type}) do
175179
validation_result
176180
|> Map.get(issue_type, [])
177181
|> order_issues_by_location()
178182
end
179183

184+
# DEPRECATED
180185
def get_issues(%{} = validation_result, _) do
181186
validation_result
182187
|> Map.values()
@@ -185,8 +190,46 @@ defmodule Transport.Validators.NeTEx.ResultsAdapters.V0_1_0 do
185190
|> order_issues_by_location()
186191
end
187192

193+
# DEPRECATED
188194
def get_issues(_, _), do: []
189195

196+
def get_issues(binary, %{} = filter, %Scrivener.Config{} = pagination_config) when is_binary(binary) do
197+
binary
198+
|> Commons.from_binary()
199+
|> get_issues(filter, pagination_config)
200+
end
201+
202+
def get_issues(
203+
%Explorer.DataFrame{} = df,
204+
%{"issue_type" => issue_type},
205+
%Scrivener.Config{} = pagination_config
206+
) do
207+
df
208+
|> DF.filter(code == ^issue_type)
209+
|> order_issues_by_location()
210+
|> Commons.count_and_slice(pagination_config)
211+
end
212+
213+
def get_issues(%Explorer.DataFrame{} = df, %{}, %Scrivener.Config{} = pagination_config) do
214+
filter = %{"issue_type" => pick_default_issue_type(df)}
215+
216+
get_issues(df, filter, pagination_config)
217+
end
218+
219+
def get_issues(_, _, _), do: []
220+
221+
def pick_default_issue_type(%Explorer.DataFrame{} = df) do
222+
get_codes(df) |> List.first()
223+
end
224+
225+
def get_codes(%Explorer.DataFrame{} = df), do: Commons.get_values(df, "code")
226+
227+
def order_issues_by_location(%Explorer.DataFrame{} = df) do
228+
df
229+
|> DF.sort_by(&[&1["resource.filename"], &1["resource.line"], &1["message"]])
230+
end
231+
232+
# DEPRECATED
190233
def order_issues_by_location(issues) do
191234
issues
192235
|> Enum.sort_by(fn issue ->
@@ -214,4 +257,18 @@ defmodule Transport.Validators.NeTEx.ResultsAdapters.V0_1_0 do
214257

215258
%{"summary" => summary, "stats" => stats, "issues" => issues, "max_severity" => max_severity}
216259
end
260+
261+
@impl Transport.Validators.NeTEx.ResultsAdapter
262+
def to_dataframe(errors) do
263+
Commons.to_dataframe(errors, fn _ -> %{} end)
264+
end
265+
266+
@impl Transport.Validators.NeTEx.ResultsAdapter
267+
def to_binary_result(result) do
268+
result
269+
|> Map.values()
270+
|> List.flatten()
271+
|> to_dataframe()
272+
|> Commons.to_binary()
273+
end
217274
end

0 commit comments

Comments
 (0)