Skip to content
111 changes: 111 additions & 0 deletions livebook/irve-count-faster.livemd
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Suivi du nombre d'IRVE (version accélérée)

```elixir
Mix.install([
{:req, "~> 0.5.7"},
{:nimble_csv, "~> 1.2"},
{:kino, "~> 0.14.2"},
{:explorer, "~> 0.10.0"},
{:kino_vega_lite, "~> 0.1.11"},
{:kino_explorer, "~> 0.1.20"}
])
```

## Analyse

Je pars de la ressource [Fichier consolidé des bornes de recharge pour véhicule électrique](https://transport.data.gouv.fr/datasets/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques).

Je récupére l'identifiant du dataset (`118`) et je m'appuie sur l'url qui retourne l'historique.

Reesource pour filtre:

* https://transport.data.gouv.fr/resources/81623

```elixir
Code.require_file(__DIR__ <> "/../apps/shared/lib/req_custom_cache.ex")

require Explorer.DataFrame

defmodule HTTPQuery do
def get!(url) do
%{status: 200, body: data} = Req.get!(url)
data
end

def cache_dir, do: Path.join(__DIR__, "../cache-dir")

def cached_get!(url) do
req = Req.new() |> Transport.Shared.ReqCustomCache.attach()
# avoid decoding, for much faster processing
Req.get!(req, url: url, receive_timeout: 100_000, custom_cache_dir: cache_dir(), decode_body: false)
end
end

defmodule Stats do
def get_versions_data(headers, rows) do
rows
|> build_list_of_maps(headers)
|> remove_json_rows()
|> prepare_date_field()
|> pick_first_row_by_month()
end

def build_list_of_maps(rows, headers), do: rows |> Enum.map(&(headers |> Enum.zip(&1) |> Map.new()))
def remove_json_rows(rows), do: rows |> Enum.reject(fn(row) -> row["permanent_url"] =~ ~r/\.json$/ end)

def prepare_date_field(rows) do
rows
|> Enum.map(fn row -> Map.update!(row, "inserted_at", fn(x) ->
String.slice(x, 0..9) |> Date.from_iso8601!()
end) end )
end

def pick_first_row_by_month(rows) do
rows
|> Enum.group_by(fn(x) -> Map.fetch!(x, "inserted_at") |> to_string() |> String.slice(0..6) end)
|> Enum.map(fn({_k,v}) -> v |> Enum.sort_by(fn(x) -> x["inserted_at"] end) |> List.first end)
end
end

history_url = "https://transport.data.gouv.fr/datasets/118/resources_history_csv"
[headers | rows] = HTTPQuery.get!(history_url)
data = Stats.get_versions_data(headers, rows)

data = data
|> Task.async_stream(fn(row) ->
try do
%{status: 200, body: body} = row["permanent_url"] |> HTTPQuery.cached_get!()
[headers | rows] = body |> String.split("\n")
headers = headers |> String.split(",")
{true, pdc_count} = {"id_pdc_itinerance" in headers, rows |> length}
row
|> Map.put("pdc_count", pdc_count)
|> Map.drop(["payload", "permanent_url"])
rescue
_x ->
# IO.inspect _x
nil
end
end, timeout: 100_000, max_concurrency: 10)
|> Enum.map(fn({:ok, row}) -> row end)
|> Enum.reject(&is_nil(&1))

:ok

```

```elixir
data
|> Enum.map(fn(x) -> Map.take(x, ["inserted_at", "pdc_count"]) end)
|> Enum.sort_by(&(&1["inserted_at"] |> to_string()), :desc)
|> Kino.DataTable.new()

```

```elixir
VegaLite.new(width: 750, height: 500)
|> VegaLite.data_from_values(data, only: ["inserted_at", "pdc_count"])
|> VegaLite.mark(:area)
|> VegaLite.encode_field(:x, "inserted_at", type: :temporal, time_unit: "yearmonth", axis: [format: "%Y-%m", label_angle: -45])
|> VegaLite.encode_field(:y, "pdc_count", type: :quantitative)
```