# Copyright 2017 Okera Inc. All Rights Reserved.
#
# Tests that should run on any configuration. The server auth can be specified
# as an environment variables before running this test.
# pylint: disable=bad-continuation
# pylint: disable=bad-indentation
import unittest
import numpy
from okera.tests import pycerebro_test_common as common
[docs]class BasicTest(unittest.TestCase):
[docs] def test_sparse_data(self):
with common.get_planner() as planner:
df = planner.scan_as_pandas("rs.sparsedata")
self.assertEqual(96, len(df), msg=df)
self.assertEqual(68, df['age'].count(), msg=df)
self.assertEqual(10.0, df['age'].min(), msg=df)
self.assertEqual(96.0, df['age'].max(), msg=df)
self.assertEqual(b'sjc', df['defaultcity'].max(), msg=df)
self.assertEqual(86, df['description'].count(), msg=df)
[docs] def test_nulls(self):
with common.get_planner() as planner:
df = planner.scan_as_pandas("select string_col from rs.alltypes_null")
self.assertEqual(1, len(df), msg=df)
self.assertTrue(numpy.isnan(df['string_col'][0]), msg=df)
df = planner.scan_as_pandas(
"select length(string_col) as c from rs.alltypes_null")
self.assertEqual(1, len(df), msg=df)
self.assertTrue(numpy.isnan(df['c'][0]), msg=df)
[docs] def test_timestamp_functions(self):
with common.get_planner() as planner:
json = planner.scan_as_json("""
select date_add('2009-01-01', 10) as c from okera_sample.sample""")
self.assertTrue(len(json) == 2, msg=json)
self.assertEqual('2009-01-11 00:00:00+00:00', str(json[0]['c']), msg=json)
self.assertEqual('2009-01-11 00:00:00+00:00', str(json[1]['c']), msg=json)
[docs] def test_duplicate_cols(self):
with common.get_planner() as planner:
json = planner.scan_as_json("""
select record, record from okera_sample.sample""")
self.assertTrue(len(json) == 2, msg=json)
self.assertEqual('This is a sample test file.', str(json[0]['record']),
msg=json)
self.assertEqual('This is a sample test file.', str(json[0]['record_2']),
msg=json)
with common.get_planner() as planner:
json = planner.scan_as_json("""
select record, record as record_2, record from okera_sample.sample""")
self.assertTrue(len(json) == 2, msg=json)
self.assertEqual('This is a sample test file.', str(json[0]['record']),
msg=json)
self.assertEqual('This is a sample test file.', str(json[0]['record_2']),
msg=json)
self.assertEqual('This is a sample test file.', str(json[0]['record_2_2']),
msg=json)
[docs] def test_large_decimals(self):
with common.get_planner() as planner:
json = planner.scan_as_json("select num from rs.large_decimals2")
self.assertTrue(len(json) == 6, msg=json)
self.assertEqual('9012248907891233.020304050670',
str(json[0]['num']), msg=json)
self.assertEqual('2343.999900000000', str(json[1]['num']), msg=json)
self.assertEqual('900.000000000000', str(json[2]['num']), msg=json)
self.assertEqual('32.440000000000', str(json[3]['num']), msg=json)
self.assertEqual('54.230000000000', str(json[4]['num']), msg=json)
self.assertEqual('4525.340000000000', str(json[5]['num']), msg=json)
with common.get_planner() as planner:
df = planner.scan_as_pandas("select num from rs.large_decimals2")
self.assertTrue(len(df) == 6, msg=df)
self.assertEqual('9012248907891233.020304050670',
str(df['num'][0]), msg=df)
self.assertEqual('2343.999900000000', str(df['num'][1]), msg=df)
self.assertEqual('900.000000000000', str(df['num'][2]), msg=df)
self.assertEqual('32.440000000000', str(df['num'][3]), msg=df)
self.assertEqual('54.230000000000', str(df['num'][4]), msg=df)
self.assertEqual('4525.340000000000', str(df['num'][5]), msg=df)
[docs] def test_scan_as_json_max_records(self):
sql = "select * from okera_sample.sample"
with common.get_planner() as planner:
json = planner.scan_as_json(sql, max_records=1, max_client_process_count=1)
self.assertTrue(len(json) == 1, msg='max_records not respected')
json = planner.scan_as_json(sql, max_records=100, max_client_process_count=1)
self.assertTrue(len(json) == 2, msg='max_records not respected')
[docs] def test_scan_as_pandas_max_records(self):
sql = "select * from okera_sample.sample"
with common.get_planner() as planner:
pd = planner.scan_as_pandas(sql, max_records=1, max_client_process_count=1)
self.assertTrue(len(pd.index) == 1, msg='max_records not respected')
pd = planner.scan_as_pandas(sql, max_records=100, max_client_process_count=1)
self.assertTrue(len(pd.index) == 2, msg='max_records not respected')
if __name__ == "__main__":
unittest.main()