Skip to content

Commit 46764a5

Browse files
authored
feat: support load_table and list_rows with picosecond timestamp (#2351)
1 parent 8d5785a commit 46764a5

File tree

14 files changed

+328
-10
lines changed

14 files changed

+328
-10
lines changed

google/cloud/bigquery/_helpers.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
from google.cloud._helpers import _RFC3339_MICROS
3333
from google.cloud._helpers import _RFC3339_NO_FRACTION
3434
from google.cloud._helpers import _to_bytes
35+
from google.cloud.bigquery import enums
36+
3537
from google.auth import credentials as ga_credentials # type: ignore
3638
from google.api_core import client_options as client_options_lib
3739

@@ -252,11 +254,15 @@ def bytes_to_py(self, value, field):
252254
if _not_null(value, field):
253255
return base64.standard_b64decode(_to_bytes(value))
254256

255-
def timestamp_to_py(self, value, field):
256-
"""Coerce 'value' to a datetime, if set or not nullable."""
257+
def timestamp_to_py(self, value, field) -> Union[datetime.datetime, str, None]:
258+
"""Coerce 'value' to a datetime, if set or not nullable. If timestamp
259+
is of picosecond precision, preserve the string format."""
260+
if field.timestamp_precision == enums.TimestampPrecision.PICOSECOND:
261+
return value
257262
if _not_null(value, field):
258263
# value will be a integer in seconds, to microsecond precision, in UTC.
259264
return _datetime_from_microseconds(int(value))
265+
return None
260266

261267
def datetime_to_py(self, value, field):
262268
"""Coerce 'value' to a datetime, if set or not nullable.

google/cloud/bigquery/_job_helpers.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import google.api_core.exceptions as core_exceptions
5050
from google.api_core import retry as retries
5151

52+
from google.cloud.bigquery import enums
5253
from google.cloud.bigquery import job
5354
import google.cloud.bigquery.job.query
5455
import google.cloud.bigquery.query
@@ -265,6 +266,7 @@ def _to_query_request(
265266
query: str,
266267
location: Optional[str] = None,
267268
timeout: Optional[float] = None,
269+
timestamp_precision: Optional[enums.TimestampPrecision] = None,
268270
) -> Dict[str, Any]:
269271
"""Transform from Job resource to QueryRequest resource.
270272
@@ -285,10 +287,15 @@ def _to_query_request(
285287
# Default to standard SQL.
286288
request_body.setdefault("useLegacySql", False)
287289

288-
# Since jobs.query can return results, ensure we use the lossless timestamp
289-
# format. See: https://github.com/googleapis/python-bigquery/issues/395
290290
request_body.setdefault("formatOptions", {})
291-
request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore
291+
292+
# Cannot specify both use_int64_timestamp and timestamp_output_format.
293+
if timestamp_precision == enums.TimestampPrecision.PICOSECOND:
294+
request_body["formatOptions"]["timestampOutputFormat"] = "ISO8601_STRING" # type: ignore
295+
else:
296+
# Since jobs.query can return results, ensure we use the lossless
297+
# timestamp format. See: https://github.com/googleapis/python-bigquery/issues/395
298+
request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore
292299

293300
if timeout is not None:
294301
# Subtract a buffer for context switching, network latency, etc.
@@ -370,14 +377,19 @@ def query_jobs_query(
370377
retry: retries.Retry,
371378
timeout: Optional[float],
372379
job_retry: Optional[retries.Retry],
380+
timestamp_precision: Optional[enums.TimestampPrecision] = None,
373381
) -> job.QueryJob:
374382
"""Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED.
375383
376384
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
377385
"""
378386
path = _to_query_path(project)
379387
request_body = _to_query_request(
380-
query=query, job_config=job_config, location=location, timeout=timeout
388+
query=query,
389+
job_config=job_config,
390+
location=location,
391+
timeout=timeout,
392+
timestamp_precision=timestamp_precision,
381393
)
382394

383395
def do_query():

google/cloud/bigquery/client.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3469,6 +3469,8 @@ def query(
34693469
timeout: TimeoutType = DEFAULT_TIMEOUT,
34703470
job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY,
34713471
api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT,
3472+
*,
3473+
timestamp_precision: Optional[enums.TimestampPrecision] = None,
34723474
) -> job.QueryJob:
34733475
"""Run a SQL query.
34743476
@@ -3524,6 +3526,11 @@ def query(
35243526
35253527
See :class:`google.cloud.bigquery.enums.QueryApiMethod` for
35263528
details on the difference between the query start methods.
3529+
timestamp_precision (Optional[enums.TimestampPrecision]):
3530+
[Private Preview] If set to `enums.TimestampPrecision.PICOSECOND`,
3531+
timestamp columns of picosecond precision will be returned with
3532+
full precision. Otherwise, will truncate to microsecond
3533+
precision. Only applies when api_method == `enums.QueryApiMethod.QUERY`.
35273534
35283535
Returns:
35293536
google.cloud.bigquery.job.QueryJob: A new query job instance.
@@ -3543,6 +3550,15 @@ def query(
35433550
"`job_id` was provided, but the 'QUERY' `api_method` was requested."
35443551
)
35453552

3553+
if (
3554+
timestamp_precision == enums.TimestampPrecision.PICOSECOND
3555+
and api_method != enums.QueryApiMethod.QUERY
3556+
):
3557+
raise ValueError(
3558+
"Picosecond Timestamp is only supported when `api_method "
3559+
"== enums.QueryApiMethod.QUERY`."
3560+
)
3561+
35463562
if project is None:
35473563
project = self.project
35483564

@@ -3568,6 +3584,7 @@ def query(
35683584
retry,
35693585
timeout,
35703586
job_retry,
3587+
timestamp_precision=timestamp_precision,
35713588
)
35723589
elif api_method == enums.QueryApiMethod.INSERT:
35733590
return _job_helpers.query_jobs_insert(
@@ -4062,6 +4079,8 @@ def list_rows(
40624079
page_size: Optional[int] = None,
40634080
retry: retries.Retry = DEFAULT_RETRY,
40644081
timeout: TimeoutType = DEFAULT_TIMEOUT,
4082+
*,
4083+
timestamp_precision: Optional[enums.TimestampPrecision] = None,
40654084
) -> RowIterator:
40664085
"""List the rows of the table.
40674086
@@ -4110,6 +4129,11 @@ def list_rows(
41104129
before using ``retry``.
41114130
If multiple requests are made under the hood, ``timeout``
41124131
applies to each individual request.
4132+
timestamp_precision (Optional[enums.TimestampPrecision]):
4133+
[Private Preview] If set to `enums.TimestampPrecision.PICOSECOND`,
4134+
timestamp columns of picosecond precision will be returned with
4135+
full precision. Otherwise, will truncate to microsecond
4136+
precision.
41134137
41144138
Returns:
41154139
google.cloud.bigquery.table.RowIterator:
@@ -4143,7 +4167,12 @@ def list_rows(
41434167
if start_index is not None:
41444168
params["startIndex"] = start_index
41454169

4146-
params["formatOptions.useInt64Timestamp"] = True
4170+
# Cannot specify both use_int64_timestamp and timestamp_output_format.
4171+
if timestamp_precision == enums.TimestampPrecision.PICOSECOND:
4172+
params["formatOptions.timestampOutputFormat"] = "ISO8601_STRING"
4173+
else:
4174+
params["formatOptions.useInt64Timestamp"] = True
4175+
41474176
row_iterator = RowIterator(
41484177
client=self,
41494178
api_request=functools.partial(self._call_api, retry, timeout=timeout),

google/cloud/bigquery/job/load.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,36 @@ def column_name_character_map(self, value: Optional[str]):
759759
value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED
760760
self._set_sub_prop("columnNameCharacterMap", value)
761761

762+
@property
763+
def timestamp_target_precision(self) -> Optional[List[int]]:
764+
"""Optional[list[int]]: [Private Preview] Precisions (maximum number of
765+
total digits in base 10) for seconds of TIMESTAMP types that are
766+
allowed to the destination table for autodetection mode.
767+
768+
Available for the formats: CSV.
769+
770+
For the CSV Format, Possible values include:
771+
None, [], or [6]: timestamp(6) for all auto detected TIMESTAMP
772+
columns.
773+
[6, 12]: timestamp(6) for all auto detected TIMESTAMP columns that
774+
have less than 6 digits of subseconds. timestamp(12) for all auto
775+
detected TIMESTAMP columns that have more than 6 digits of
776+
subseconds.
777+
[12]: timestamp(12) for all auto detected TIMESTAMP columns.
778+
779+
The order of the elements in this array is ignored. Inputs that have
780+
higher precision than the highest target precision in this array will
781+
be truncated.
782+
"""
783+
return self._get_sub_prop("timestampTargetPrecision")
784+
785+
@timestamp_target_precision.setter
786+
def timestamp_target_precision(self, value: Optional[List[int]]):
787+
if value is not None:
788+
self._set_sub_prop("timestampTargetPrecision", value)
789+
else:
790+
self._del_sub_prop("timestampTargetPrecision")
791+
762792

763793
class LoadJob(_AsyncJob):
764794
"""Asynchronous job for loading data into a table.

tests/data/pico.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2025-01-01T00:00:00.123456789012Z
2+
2025-01-02T00:00:00.123456789012Z
3+
2025-01-03T00:00:00.123456789012Z

tests/data/pico_schema.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[
2+
{
3+
"name": "pico_col",
4+
"type": "TIMESTAMP",
5+
"mode": "NULLABLE",
6+
"timestampPrecision": "12"
7+
}
8+
]

tests/system/conftest.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,14 @@ def load_scalars_table(
9898
data_path: str = "scalars.jsonl",
9999
source_format=enums.SourceFormat.NEWLINE_DELIMITED_JSON,
100100
schema_source="scalars_schema.json",
101+
timestamp_target_precision=None,
101102
) -> str:
102103
schema = bigquery_client.schema_from_json(DATA_DIR / schema_source)
103104
table_id = data_path.replace(".", "_") + hex(random.randrange(1000000))
104105
job_config = bigquery.LoadJobConfig()
105106
job_config.schema = schema
106107
job_config.source_format = source_format
108+
job_config.timestamp_target_precision = timestamp_target_precision
107109
full_table_id = f"{project_id}.{dataset_id}.{table_id}"
108110
with open(DATA_DIR / data_path, "rb") as data_file:
109111
job = bigquery_client.load_table_from_file(
@@ -169,6 +171,23 @@ def scalars_table_csv(
169171
bigquery_client.delete_table(full_table_id, not_found_ok=True)
170172

171173

174+
@pytest.fixture(scope="session")
175+
def scalars_table_pico(
176+
bigquery_client: bigquery.Client, project_id: str, dataset_id: str
177+
):
178+
full_table_id = load_scalars_table(
179+
bigquery_client,
180+
project_id,
181+
dataset_id,
182+
data_path="pico.csv",
183+
source_format=enums.SourceFormat.CSV,
184+
schema_source="pico_schema.json",
185+
timestamp_target_precision=[12],
186+
)
187+
yield full_table_id
188+
bigquery_client.delete_table(full_table_id, not_found_ok=True)
189+
190+
172191
@pytest.fixture
173192
def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub):
174193
return replace_non_anum("_", request.node.name)

tests/system/test_client.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,6 +1295,29 @@ def test_load_table_from_json_schema_autodetect_table_exists(self):
12951295
self.assertEqual(tuple(table.schema), table_schema)
12961296
self.assertEqual(table.num_rows, 2)
12971297

1298+
def test_load_table_from_csv_w_picosecond_timestamp(self):
1299+
dataset_id = _make_dataset_id("bq_system_test")
1300+
self.temp_dataset(dataset_id)
1301+
table_id = "{}.{}.load_table_from_json_basic_use".format(
1302+
Config.CLIENT.project, dataset_id
1303+
)
1304+
1305+
table_schema = Config.CLIENT.schema_from_json(DATA_PATH / "pico_schema.json")
1306+
# create the table before loading so that the column order is predictable
1307+
table = helpers.retry_403(Config.CLIENT.create_table)(
1308+
Table(table_id, schema=table_schema)
1309+
)
1310+
self.to_delete.insert(0, table)
1311+
1312+
# do not pass an explicit job config to trigger automatic schema detection
1313+
with open(DATA_PATH / "pico.csv", "rb") as f:
1314+
load_job = Config.CLIENT.load_table_from_file(f, table_id)
1315+
load_job.result()
1316+
1317+
table = Config.CLIENT.get_table(table)
1318+
self.assertEqual(list(table.schema), table_schema)
1319+
self.assertEqual(table.num_rows, 3)
1320+
12981321
def test_load_avro_from_uri_then_dump_table(self):
12991322
from google.cloud.bigquery.job import CreateDisposition
13001323
from google.cloud.bigquery.job import SourceFormat

tests/system/test_list_rows.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,23 @@ def test_list_rows_range(bigquery_client: bigquery.Client, scalars_table_csv: st
132132

133133
row_null = rows[1]
134134
assert row_null["range_date"] is None
135+
136+
137+
def test_list_rows_pico(bigquery_client: bigquery.Client, scalars_table_pico: str):
138+
rows = bigquery_client.list_rows(
139+
scalars_table_pico, timestamp_precision=enums.TimestampPrecision.PICOSECOND
140+
)
141+
rows = list(rows)
142+
row = rows[0]
143+
assert row["pico_col"] == "2025-01-01T00:00:00.123456789012Z"
144+
145+
146+
def test_list_rows_pico_truncate(
147+
bigquery_client: bigquery.Client, scalars_table_pico: str
148+
):
149+
# For a picosecond timestamp column, if the user does not explicitly set
150+
# timestamp_precision, will return truncated microsecond precision.
151+
rows = bigquery_client.list_rows(scalars_table_pico)
152+
rows = list(rows)
153+
row = rows[0]
154+
assert row["pico_col"] == "1735689600123456"

tests/system/test_query.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import pytest
2222

2323
from google.cloud import bigquery
24+
from google.cloud.bigquery import enums
2425
from google.cloud.bigquery.query import ArrayQueryParameter
2526
from google.cloud.bigquery.query import ScalarQueryParameter
2627
from google.cloud.bigquery.query import ScalarQueryParameterType
@@ -546,3 +547,15 @@ def test_session(bigquery_client: bigquery.Client, query_api_method: str):
546547

547548
assert len(rows) == 1
548549
assert rows[0][0] == 5
550+
551+
552+
def test_query_picosecond(bigquery_client: bigquery.Client):
553+
job = bigquery_client.query(
554+
"SELECT CAST('2025-10-20' AS TIMESTAMP(12));",
555+
api_method="QUERY",
556+
timestamp_precision=enums.TimestampPrecision.PICOSECOND,
557+
)
558+
559+
result = job.result()
560+
rows = list(result)
561+
assert rows[0][0] == "2025-10-20T00:00:00.000000000000Z"

0 commit comments

Comments
 (0)