-
Notifications
You must be signed in to change notification settings - Fork 32
Closed
Closed
Copy link
Labels
dette techniqueEntretien & maintenance générale, nécessaire pour que le code reste de bonne qualitéEntretien & maintenance générale, nécessaire pour que le code reste de bonne qualité
Description
Première PR avortée ici #4703, en prod le cache a enflé de manière démesurée et le site s’est mis à faire des erreurs 500.
Voici ce que la requête va chercher :
iex > dataset = DB.Dataset |> Ecto.Query.last |> DB.Repo.one
iex > Transport.History.Fetcher(dataset)
La requête SQL (les temps sont donnés avec un Dataset qui revoie 6 resources history, donc très peu d’historisation) :
[debug] QUERY OK source="resource_history" db=33.3ms queue=1.0ms idle=1969.6ms
SELECT r0."id", r0."datagouv_id", r0."payload", r0."last_up_to_date_at", r0."inserted_at", r0."updated_at", r0."resource_id", r0."reuser_improved_data_id" FROM "resource_history" AS r0 LEFT OUTER JOIN "resource" AS r1 ON (r1."id" = r0."resource_id") AND (r1."dataset_id" = $1) WHERE (NOT (r1."id" IS NULL) OR r0."id" IN (SELECT sr0."id" FROM "resource_history" AS sr0 WHERE ((sr0."payload"->>'dataset_id')::bigint = $2))) ORDER BY r0."inserted_at" DESC [1291, 1291]
[debug] QUERY OK source="multi_validation" db=5.7ms queue=1.0ms idle=1009.0ms
SELECT DISTINCT ON (m0."resource_history_id") m0."id", m0."validation_timestamp", m0."validator", m0."validator_version", m0."command", m0."result", m0."data_vis", m0."max_error", m0."oban_args", m0."resource_id", m0."resource_history_id", m0."validated_data_name", m0."secondary_resource_id", m0."secondary_resource_history_id", m0."secondary_validated_data_name", m0."inserted_at", m0."updated_at", m0."resource_history_id" FROM "multi_validation" AS m0 WHERE (m0."resource_history_id" = ANY($1)) ORDER BY m0."resource_history_id", m0."resource_history_id", m0."inserted_at" DESC [[257545, 257682, 257737, 257848, 257858, 258262]]
[debug] QUERY OK source="resource_metadata" db=2.2ms queue=0.9ms idle=1018.1ms
SELECT r0."id", r0."resource_id", r0."resource_history_id", r0."multi_validation_id", r0."metadata", r0."modes", r0."features", r0."inserted_at", r0."updated_at", r0."multi_validation_id" FROM "resource_metadata" AS r0 WHERE (r0."multi_validation_id" = ANY($1)) [[421136, 420045, 420029, 419556, 419518, 419420]]
Un des éléments en sortie du tableau :
[
%DB.ResourceHistory{
__meta__: #Ecto.Schema.Metadata<:loaded, "resource_history">,
id: 258262,
datagouv_id: "d10fb9b2-1a5a-498c-ba55-e89cf0136ef1",
payload: %{
"conversion_GeoJSON_error" => "thread 'main' panicked at 'The GTFS file is not well formated.: CSVError { file_name: \"trips.txt\", source: Error(Deserialize { pos: Some(Position { byte: 7969, line: 103, record: 103 }), err: DeserializeError { field: None, kind: Message(\"unknown variant `2`, expected `0` or `1`\") } }), line_in_error: Some(LineError { headers: [\"trip_headsign\", \"trip_short_name\", \"shape_id\", \"direction_id\", \"block_id\", \"trip_code\", \"service_id\", \"route_id\", \"trip_id\"], values: [\"NIEPPE - GARE\", \"NAVETTE NIEPPE\", \"263\", \"2\", \"\", \"\", \"23089\", \"215\", \"23089\"] }) }', src/main.rs:42:10\nnote: run with `RUST_BACKTRACE=1` environment variable to display a backtrace\n",
"conversion_GeoJSON_fatal_error" => true,
"dataset_id" => 1291,
"download_datetime" => "2025-06-26T08:58:50.198751Z",
"filename" => "83202/83202.20250626.085850.198751.zip",
"filenames" => ["agency.txt", "calendar.txt", "calendar_dates.txt",
"routes.txt", "shapes.txt", "stop_times.txt", "stops.txt", "trips.txt"],
"filesize" => 1016244,
"format" => "GTFS",
"http_headers" => %{
"content-length" => "1017058",
"content-type" => "application/zip",
"etag" => "W/\"f84e2-197ab6fee20\"",
"last-modified" => "Thu, 26 Jun 2025 08:52:08 GMT"
},
"latest_schema_version_to_date" => nil,
"permanent_url" => "https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/83202/83202.20250626.085850.198751.zip",
"resource_latest_url" => "https://www.data.gouv.fr/fr/datasets/r/d10fb9b2-1a5a-498c-ba55-e89cf0136ef1",
"resource_url" => "https://gtfs-rt.infra-hubup.fr/hopbus/current/revision/gtfs",
"schema_name" => nil,
"schema_version" => nil,
"title" => "Lien pour la mise à jour du GTFS du réseau Hop Bus",
"total_compressed_size" => 1016244,
"total_uncompressed_size" => 1016244,
"uuid" => "73834d03-7210-4752-a035-e11615340783",
"zip_metadata" => [
%{
"compressed_size" => 111,
"file_name" => "agency.txt",
"last_modified_datetime" => "2025-06-26T08:52:08",
"sha256" => "bfb0a352948b44b95860431078b55a1ce9424c93719181b8ee2648b316b3e688",
"uncompressed_size" => 111
},
%{
"compressed_size" => 6366,
"file_name" => "calendar.txt",
"last_modified_datetime" => "2025-06-26T08:52:08",
"sha256" => "0f50cda0bdc14e62cef003b9a1b86daf675533266965ebf080eec2404d99e068",
"uncompressed_size" => 6366
},
%{
"compressed_size" => 14592,
"file_name" => "calendar_dates.txt",
"last_modified_datetime" => "2025-06-26T08:52:08",
"sha256" => "bc007f1dbdd7743fe2bb1d50ece9c85dbbf9b5df233da6cf1e35f9225cbfef37",
"uncompressed_size" => 14592
},
%{
"compressed_size" => 677,
"file_name" => "routes.txt",
"last_modified_datetime" => "2025-06-26T08:52:08",
"sha256" => "9729a355e001f5b131d3b29c754f52466e05ef514be2e4759868bbaffdb95319",
"uncompressed_size" => 677
},
%{
"compressed_size" => 864967,
"file_name" => "shapes.txt",
"last_modified_datetime" => "2025-06-26T08:52:08",
"sha256" => "e1690ed035f7c02daebdf5b0a7e964c69ef90c38a9d4f354e7e26836f17d9509",
"uncompressed_size" => 864967
},
%{
"compressed_size" => 90670,
"file_name" => "stop_times.txt",
"last_modified_datetime" => "2025-06-26T08:52:08",
"sha256" => "36965dbfb191ebc80bb30b4380f0a67f968e6e44a16b226ac95d02ce2a8cdeec",
"uncompressed_size" => 90670
},
%{
"compressed_size" => 27383,
"file_name" => "stops.txt",
"last_modified_datetime" => "2025-06-26T08:52:08",
"sha256" => "ce83cd66fab88f5ec293b90e3f37a73e266d3e1ff38f60579a04ffcbfd954660",
"uncompressed_size" => 27383
},
%{
"compressed_size" => 11478,
"file_name" => "trips.txt",
"last_modified_datetime" => "2025-06-26T08:52:08",
"sha256" => "3c559fdbc9ea4320b637635b8184b13c155591afa5991238cc3e288de801f124",
"uncompressed_size" => 11478
}
]
},
last_up_to_date_at: ~U[2025-06-27 00:11:44.769960Z],
inserted_at: ~U[2025-06-26 08:58:50.493197Z],
updated_at: ~U[2025-06-27 00:11:44.769989Z],
resource_id: 83202,
resource: #Ecto.Association.NotLoaded<association :resource is not loaded>,
reuser_improved_data_id: nil,
reuser_improved_data: #Ecto.Association.NotLoaded<association :reuser_improved_data is not loaded>,
geo_data_import: #Ecto.Association.NotLoaded<association :geo_data_import is not loaded>,
validations: [
%DB.MultiValidation{
__meta__: #Ecto.Schema.Metadata<:loaded, "multi_validation">,
id: 421136,
validation_timestamp: ~U[2025-06-26 08:58:58.570340Z],
validator: "GTFS transport-validator",
validator_version: "0.3.0",
command: "https://validation.transport.data.gouv.fr/validate?url=https%3A%2F%2Ftransport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com%2F83202%2F83202.20250626.085850.198751.zip",
result: %{
"InvalidReference" => [
%{
"details" => "The stop is referenced as a stop's parent_station but does not exist",
"issue_type" => "InvalidReference",
"object_id" => "0",
"object_type" => "Stop",
"related_objects" => [
%{
"id" => "210975",
"name" => "HAZEBROUCK - SACRÉ COEUR",
"object_type" => "Stop"
}
],
"severity" => "Fatal"
},
%{
"details" => "The stop is referenced as a stop's parent_station but does not exist",
"issue_type" => "InvalidReference",
"object_id" => "1",
"object_type" => "Stop",
"related_objects" => [
%{
"id" => "7385",
"name" => "HAZEBROUCK - CHATEAU DE L'ORME",
"object_type" => "Stop"
}
],
"severity" => "Fatal"
}
],
"UnloadableModel" => [
%{
"details" => "impossible to read csv file 'trips.txt': CSV deserialize error: record 103 (line: 103, byte: 7969): unknown variant `2`, expected `0` or `1`",
"issue_type" => "UnloadableModel",
"object_id" => "A fatal error has occured while loading the model, many rules have not been checked",
"related_file" => %{
"file_name" => "trips.txt",
"line" => %{
"headers" => ["trip_headsign", "trip_short_name", "shape_id",
"direction_id", "block_id", "trip_code", "service_id",
"route_id", "trip_id"],
"line_number" => 103,
"values" => ["NIEPPE - GARE", "NAVETTE NIEPPE", "263", "2",
"", "", "23089", "215", "23089"]
}
},
"related_objects" => [],
"severity" => "Fatal"
}
]
},
data_vis: %{
"InvalidReference" => %{
"geojson" => %{"features" => [], "type" => "FeatureCollection"},
"severity" => "Fatal"
},
"UnloadableModel" => %{
"geojson" => %{"features" => [], "type" => "FeatureCollection"},
"severity" => "Fatal"
}
},
max_error: "Fatal",
oban_args: nil,
resource_id: nil,
resource: #Ecto.Association.NotLoaded<association :resource is not loaded>,
resource_history_id: 258262,
resource_history: #Ecto.Association.NotLoaded<association :resource_history is not loaded>,
validated_data_name: nil,
secondary_resource_id: nil,
secondary_resource: #Ecto.Association.NotLoaded<association :secondary_resource is not loaded>,
secondary_resource_history_id: nil,
secondary_resource_history: #Ecto.Association.NotLoaded<association :secondary_resource_history is not loaded>,
secondary_validated_data_name: nil,
metadata: %DB.ResourceMetadata{
__meta__: #Ecto.Schema.Metadata<:loaded, "resource_metadata">,
id: 2380474,
resource_id: nil,
resource: #Ecto.Association.NotLoaded<association :resource is not loaded>,
resource_history_id: 258262,
resource_history: #Ecto.Association.NotLoaded<association :resource_history is not loaded>,
multi_validation_id: 421136,
multi_validation: #Ecto.Association.NotLoaded<association :multi_validation is not loaded>,
metadata: %{
"end_date" => "2026-07-06",
"feed_contact_emails" => %{},
"feed_end_dates" => %{},
"feed_start_dates" => %{},
"has_fares" => false,
...
},
modes: ["bus"],
features: ["position des stations", "horaires théoriques",
"topologie du réseau", ...],
inserted_at: ~U[2025-06-26 08:58:58.665820Z],
updated_at: ~U[2025-06-26 08:58:58.665820Z]
},
inserted_at: ~U[2025-06-26 08:58:58.664532Z],
updated_at: ~U[2025-06-26 08:58:58.664532Z]
}
],
metadata: #Ecto.Association.NotLoaded<association :metadata is not loaded>
},
…
]
Metadata
Metadata
Assignees
Labels
dette techniqueEntretien & maintenance générale, nécessaire pour que le code reste de bonne qualitéEntretien & maintenance générale, nécessaire pour que le code reste de bonne qualité