Database Reference
In-Depth Information
publicdata:samples.wikipedia@1386465812000$0-of-3
publicdata:samples.wikipedia@1386465812000$1-of-3
publicdata:samples.wikipedia@1386465812000$2-of-3
Listing 12.6 is similar to Listing 12.5 except it uses a partition decorator
instead of dividing up the table by rows. This method of reading the table
will likely be much faster than using the range-based indexing.
Listing 12.6 : Reading from a table in parallel using partition
decorators (tabledata_partition.py)
import os
import sys
import threading
import time
# Imports from files in this directory:
from table_reader import TableReader
from table_reader import TableReadThread
def parallel_partitioned_read(partition_count,
project_id, dataset_id, table_id, output_dir):
snapshot_time = int(time.time() * 1000)
threads = []
for index in range(partition_count):
file_name = '%s.%d' % (os.path.join(output_dir,
table_id), index)
suffix ='$%d-of-%d' % (index, partition_count)
partition_table_id = '%s@%d%s' % (table_id,
snapshot_time, suffix)
thread_reader = TableReader(
project_id=project_id,
dataset_id=dataset_id,
table_id=partition_table_id)
read_thread = TableReadThread(
thread_reader,
file_name,
thread_id=suffix)
 
 
Search WWH ::




Custom Search