Skip to content

Page de détails du dataset : améliorer la performance de la liste des resources historisées #4709

@vdegove

Description

@vdegove

En lien avec #4613 et #4502

Première PR avortée ici #4703, en prod le cache a enflé de manière démesurée et le site s’est mis à faire des erreurs 500.

Voici ce que la requête va chercher :

iex > dataset = DB.Dataset |> Ecto.Query.last |> DB.Repo.one
iex > Transport.History.Fetcher(dataset)

La requête SQL (les temps sont donnés avec un Dataset qui revoie 6 resources history, donc très peu d’historisation) :

[debug] QUERY OK source="resource_history" db=33.3ms queue=1.0ms idle=1969.6ms
SELECT r0."id", r0."datagouv_id", r0."payload", r0."last_up_to_date_at", r0."inserted_at", r0."updated_at", r0."resource_id", r0."reuser_improved_data_id" FROM "resource_history" AS r0 LEFT OUTER JOIN "resource" AS r1 ON (r1."id" = r0."resource_id") AND (r1."dataset_id" = $1) WHERE (NOT (r1."id" IS NULL) OR r0."id" IN (SELECT sr0."id" FROM "resource_history" AS sr0 WHERE ((sr0."payload"->>'dataset_id')::bigint = $2))) ORDER BY r0."inserted_at" DESC [1291, 1291]
[debug] QUERY OK source="multi_validation" db=5.7ms queue=1.0ms idle=1009.0ms
SELECT DISTINCT ON (m0."resource_history_id") m0."id", m0."validation_timestamp", m0."validator", m0."validator_version", m0."command", m0."result", m0."data_vis", m0."max_error", m0."oban_args", m0."resource_id", m0."resource_history_id", m0."validated_data_name", m0."secondary_resource_id", m0."secondary_resource_history_id", m0."secondary_validated_data_name", m0."inserted_at", m0."updated_at", m0."resource_history_id" FROM "multi_validation" AS m0 WHERE (m0."resource_history_id" = ANY($1)) ORDER BY m0."resource_history_id", m0."resource_history_id", m0."inserted_at" DESC [[257545, 257682, 257737, 257848, 257858, 258262]]
[debug] QUERY OK source="resource_metadata" db=2.2ms queue=0.9ms idle=1018.1ms
SELECT r0."id", r0."resource_id", r0."resource_history_id", r0."multi_validation_id", r0."metadata", r0."modes", r0."features", r0."inserted_at", r0."updated_at", r0."multi_validation_id" FROM "resource_metadata" AS r0 WHERE (r0."multi_validation_id" = ANY($1)) [[421136, 420045, 420029, 419556, 419518, 419420]]

Un des éléments en sortie du tableau :

[
  %DB.ResourceHistory{
    __meta__: #Ecto.Schema.Metadata<:loaded, "resource_history">,
    id: 258262,
    datagouv_id: "d10fb9b2-1a5a-498c-ba55-e89cf0136ef1",
    payload: %{
      "conversion_GeoJSON_error" => "thread 'main' panicked at 'The GTFS file is not well formated.: CSVError { file_name: \"trips.txt\", source: Error(Deserialize { pos: Some(Position { byte: 7969, line: 103, record: 103 }), err: DeserializeError { field: None, kind: Message(\"unknown variant `2`, expected `0` or `1`\") } }), line_in_error: Some(LineError { headers: [\"trip_headsign\", \"trip_short_name\", \"shape_id\", \"direction_id\", \"block_id\", \"trip_code\", \"service_id\", \"route_id\", \"trip_id\"], values: [\"NIEPPE - GARE\", \"NAVETTE NIEPPE\", \"263\", \"2\", \"\", \"\", \"23089\", \"215\", \"23089\"] }) }', src/main.rs:42:10\nnote: run with `RUST_BACKTRACE=1` environment variable to display a backtrace\n",
      "conversion_GeoJSON_fatal_error" => true,
      "dataset_id" => 1291,
      "download_datetime" => "2025-06-26T08:58:50.198751Z",
      "filename" => "83202/83202.20250626.085850.198751.zip",
      "filenames" => ["agency.txt", "calendar.txt", "calendar_dates.txt",
       "routes.txt", "shapes.txt", "stop_times.txt", "stops.txt", "trips.txt"],
      "filesize" => 1016244,
      "format" => "GTFS",
      "http_headers" => %{
        "content-length" => "1017058",
        "content-type" => "application/zip",
        "etag" => "W/\"f84e2-197ab6fee20\"",
        "last-modified" => "Thu, 26 Jun 2025 08:52:08 GMT"
      },
      "latest_schema_version_to_date" => nil,
      "permanent_url" => "https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/83202/83202.20250626.085850.198751.zip",
      "resource_latest_url" => "https://www.data.gouv.fr/fr/datasets/r/d10fb9b2-1a5a-498c-ba55-e89cf0136ef1",
      "resource_url" => "https://gtfs-rt.infra-hubup.fr/hopbus/current/revision/gtfs",
      "schema_name" => nil,
      "schema_version" => nil,
      "title" => "Lien pour la mise à jour du GTFS du réseau Hop Bus",
      "total_compressed_size" => 1016244,
      "total_uncompressed_size" => 1016244,
      "uuid" => "73834d03-7210-4752-a035-e11615340783",
      "zip_metadata" => [
        %{
          "compressed_size" => 111,
          "file_name" => "agency.txt",
          "last_modified_datetime" => "2025-06-26T08:52:08",
          "sha256" => "bfb0a352948b44b95860431078b55a1ce9424c93719181b8ee2648b316b3e688",
          "uncompressed_size" => 111
        },
        %{
          "compressed_size" => 6366,
          "file_name" => "calendar.txt",
          "last_modified_datetime" => "2025-06-26T08:52:08",
          "sha256" => "0f50cda0bdc14e62cef003b9a1b86daf675533266965ebf080eec2404d99e068",
          "uncompressed_size" => 6366
        },
        %{
          "compressed_size" => 14592,
          "file_name" => "calendar_dates.txt",
          "last_modified_datetime" => "2025-06-26T08:52:08",
          "sha256" => "bc007f1dbdd7743fe2bb1d50ece9c85dbbf9b5df233da6cf1e35f9225cbfef37",
          "uncompressed_size" => 14592
        },
        %{
          "compressed_size" => 677,
          "file_name" => "routes.txt",
          "last_modified_datetime" => "2025-06-26T08:52:08",
          "sha256" => "9729a355e001f5b131d3b29c754f52466e05ef514be2e4759868bbaffdb95319",
          "uncompressed_size" => 677
        },
        %{
          "compressed_size" => 864967,
          "file_name" => "shapes.txt",
          "last_modified_datetime" => "2025-06-26T08:52:08",
          "sha256" => "e1690ed035f7c02daebdf5b0a7e964c69ef90c38a9d4f354e7e26836f17d9509",
          "uncompressed_size" => 864967
        },
        %{
          "compressed_size" => 90670,
          "file_name" => "stop_times.txt",
          "last_modified_datetime" => "2025-06-26T08:52:08",
          "sha256" => "36965dbfb191ebc80bb30b4380f0a67f968e6e44a16b226ac95d02ce2a8cdeec",
          "uncompressed_size" => 90670
        },
        %{
          "compressed_size" => 27383,
          "file_name" => "stops.txt",
          "last_modified_datetime" => "2025-06-26T08:52:08",
          "sha256" => "ce83cd66fab88f5ec293b90e3f37a73e266d3e1ff38f60579a04ffcbfd954660",
          "uncompressed_size" => 27383
        },
        %{
          "compressed_size" => 11478,
          "file_name" => "trips.txt",
          "last_modified_datetime" => "2025-06-26T08:52:08",
          "sha256" => "3c559fdbc9ea4320b637635b8184b13c155591afa5991238cc3e288de801f124",
          "uncompressed_size" => 11478
        }
      ]
    },
    last_up_to_date_at: ~U[2025-06-27 00:11:44.769960Z],
    inserted_at: ~U[2025-06-26 08:58:50.493197Z],
    updated_at: ~U[2025-06-27 00:11:44.769989Z],
    resource_id: 83202,
    resource: #Ecto.Association.NotLoaded<association :resource is not loaded>,
    reuser_improved_data_id: nil,
    reuser_improved_data: #Ecto.Association.NotLoaded<association :reuser_improved_data is not loaded>,
    geo_data_import: #Ecto.Association.NotLoaded<association :geo_data_import is not loaded>,
    validations: [
      %DB.MultiValidation{
        __meta__: #Ecto.Schema.Metadata<:loaded, "multi_validation">,
        id: 421136,
        validation_timestamp: ~U[2025-06-26 08:58:58.570340Z],
        validator: "GTFS transport-validator",
        validator_version: "0.3.0",
        command: "https://validation.transport.data.gouv.fr/validate?url=https%3A%2F%2Ftransport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com%2F83202%2F83202.20250626.085850.198751.zip",
        result: %{
          "InvalidReference" => [
            %{
              "details" => "The stop is referenced as a stop's parent_station but does not exist",
              "issue_type" => "InvalidReference",
              "object_id" => "0",
              "object_type" => "Stop",
              "related_objects" => [
                %{
                  "id" => "210975",
                  "name" => "HAZEBROUCK - SACRÉ COEUR",
                  "object_type" => "Stop"
                }
              ],
              "severity" => "Fatal"
            },
            %{
              "details" => "The stop is referenced as a stop's parent_station but does not exist",
              "issue_type" => "InvalidReference",
              "object_id" => "1",
              "object_type" => "Stop",
              "related_objects" => [
                %{
                  "id" => "7385",
                  "name" => "HAZEBROUCK -  CHATEAU DE L'ORME",
                  "object_type" => "Stop"
                }
              ],
              "severity" => "Fatal"
            }
          ],
          "UnloadableModel" => [
            %{
              "details" => "impossible to read csv file 'trips.txt': CSV deserialize error: record 103 (line: 103, byte: 7969): unknown variant `2`, expected `0` or `1`",
              "issue_type" => "UnloadableModel",
              "object_id" => "A fatal error has occured while loading the model, many rules have not been checked",
              "related_file" => %{
                "file_name" => "trips.txt",
                "line" => %{
                  "headers" => ["trip_headsign", "trip_short_name", "shape_id",
                   "direction_id", "block_id", "trip_code", "service_id",
                   "route_id", "trip_id"],
                  "line_number" => 103,
                  "values" => ["NIEPPE - GARE", "NAVETTE NIEPPE", "263", "2",
                   "", "", "23089", "215", "23089"]
                }
              },
              "related_objects" => [],
              "severity" => "Fatal"
            }
          ]
        },
        data_vis: %{
          "InvalidReference" => %{
            "geojson" => %{"features" => [], "type" => "FeatureCollection"},
            "severity" => "Fatal"
          },
          "UnloadableModel" => %{
            "geojson" => %{"features" => [], "type" => "FeatureCollection"},
            "severity" => "Fatal"
          }
        },
        max_error: "Fatal",
        oban_args: nil,
        resource_id: nil,
        resource: #Ecto.Association.NotLoaded<association :resource is not loaded>,
        resource_history_id: 258262,
        resource_history: #Ecto.Association.NotLoaded<association :resource_history is not loaded>,
        validated_data_name: nil,
        secondary_resource_id: nil,
        secondary_resource: #Ecto.Association.NotLoaded<association :secondary_resource is not loaded>,
        secondary_resource_history_id: nil,
        secondary_resource_history: #Ecto.Association.NotLoaded<association :secondary_resource_history is not loaded>,
        secondary_validated_data_name: nil,
        metadata: %DB.ResourceMetadata{
          __meta__: #Ecto.Schema.Metadata<:loaded, "resource_metadata">,
          id: 2380474,
          resource_id: nil,
          resource: #Ecto.Association.NotLoaded<association :resource is not loaded>,
          resource_history_id: 258262,
          resource_history: #Ecto.Association.NotLoaded<association :resource_history is not loaded>,
          multi_validation_id: 421136,
          multi_validation: #Ecto.Association.NotLoaded<association :multi_validation is not loaded>,
          metadata: %{
            "end_date" => "2026-07-06",
            "feed_contact_emails" => %{},
            "feed_end_dates" => %{},
            "feed_start_dates" => %{},
            "has_fares" => false,
            ...
          },
          modes: ["bus"],
          features: ["position des stations", "horaires théoriques",
           "topologie du réseau", ...],
          inserted_at: ~U[2025-06-26 08:58:58.665820Z],
          updated_at: ~U[2025-06-26 08:58:58.665820Z]
        },
        inserted_at: ~U[2025-06-26 08:58:58.664532Z],
        updated_at: ~U[2025-06-26 08:58:58.664532Z]
      }
    ],
    metadata: #Ecto.Association.NotLoaded<association :metadata is not loaded>
  },
…
]

Metadata

Metadata

Labels

dette techniqueEntretien & maintenance générale, nécessaire pour que le code reste de bonne qualité

Type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions