diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..61c8e0e --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,79 @@ +name: CI + +on: + pull_request: + push: + branches: [main, develop] + +jobs: + test: + name: test (py${{ matrix.python-version }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install package and test deps + # `spatial_index` on PyPI has no wheels for Python 3.9+ and no + # source distribution, so `pip install .` cannot resolve it on the + # modern matrix. Install nexsciTAP with --no-deps and rely on the + # test fixture's spatial_index stub (tests/fixtures/stubs/) on + # PYTHONPATH for the constants TAP imports. configobj is the only + # actually-needed runtime dep and is in requirements-test.txt. + # ADQL is installed with --no-deps for the same reason. + run: | + pip install --upgrade pip + pip install -r requirements-test.txt + pip install --no-deps "ADQL==1.0.6" + pip install --no-deps . + + - name: Lint with ruff + run: ruff check . + + - name: Run tests + run: pytest tests/ -v + + wheel-platforms: + name: wheel build (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Build wheel + run: | + pip install --upgrade pip build + python -m build --wheel + + - name: Verify wheel artifact + # See test job for spatial_index PyPI situation. We can't install + # the wheel's full deps on modern Python; just confirm the wheel + # was built and includes the C extension. + run: | + ls dist/ + python -c " + import zipfile, glob + wheel = glob.glob('dist/*.whl')[0] + with zipfile.ZipFile(wheel) as z: + names = z.namelist() + assert any('writerecs' in n and n.endswith('.so') for n in names), \ + f'C extension missing from wheel: {names}' + print(f'wheel OK: {wheel}') + print('contains C extension') + " diff --git a/.gitignore b/.gitignore index fcd07d6..035818a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,15 @@ *.[oa] *~ +*.so /Sphinx/_build /TAP/__pycache__ -/TAP/writerecs.cpython-36m-darwin.so /build /dist /final_dist /final_wheel /nexsciTAP.egg-info +__pycache__/ +.pytest_cache/ # Workflow artifacts (Claude Code planning/spec docs, not project documentation) /docs/superpowers/ diff --git a/TAP/configparam.py b/TAP/configparam.py index 5f533b7..58701e8 100644 --- a/TAP/configparam.py +++ b/TAP/configparam.py @@ -3,11 +3,12 @@ # https://github.com/Caltech-IPAC/nexsciTAP/blob/master/LICENSE -import os import logging -import configobj +import os import pprint +import configobj + class configParam: @@ -31,7 +32,7 @@ class configParam: def __init__(self, path, **kwargs): - pp = pprint.PrettyPrinter(indent=3) + pprint.PrettyPrinter(indent=3) @@ -80,7 +81,7 @@ def __init__(self, path, **kwargs): logging.debug('') logging.debug('ConfigObj instantiated successfully') - + ### Web server config parameters ############ self.web = 'WEB' @@ -120,7 +121,7 @@ def __init__(self, path, **kwargs): # HTTP_URL (includingt HTTP_PORT) - self.httpurl = None + self.httpurl = None if ('HTTP_URL' in confobj[self.web]): self.httpurl = confobj[self.web]['HTTP_URL'] @@ -129,7 +130,7 @@ def __init__(self, path, **kwargs): self.msg = 'Failed to find HTTP_URL in config_file' raise Exception(self.msg) - self.port = None + self.port = None if('HTTP_PORT' in confobj[self.web]): self.port = confobj[self.web]['HTTP_PORT'] @@ -139,23 +140,23 @@ def __init__(self, path, **kwargs): # CGIPGM - self.cgipgm = None + self.cgipgm = None if ('CGI_PGM' in confobj[self.web]): self.cgipgm = confobj[self.web]['CGI_PGM'] # ARRAYSIZE - self.arraysize = 10000 + self.arraysize = 10000 if ('ArraySize' in confobj[self.web]): self.arraysize = confobj[self.web]['ArraySize'] # INFOMSG - self.infomsg = '' - if('INFOMSG' in confobj[self.web]): - self.infomsg = confobj[self.web]['INFOMSG'] + self.infomsg = '' + if('INFOMSG' in confobj[self.web]): + self.infomsg = confobj[self.web]['INFOMSG'] if self.debug: logging.debug('') @@ -165,10 +166,10 @@ def __init__(self, path, **kwargs): logging.debug('arraysize = %s', self.infomsg) - + ### Configuration ########################### - # If there was an input configuration 'instance', read 'db_connection' + # If there was an input configuration 'instance', read 'db_connection' # and 'sptind_config' section names from there. Otherwise use the # following defaults: @@ -186,7 +187,7 @@ def __init__(self, path, **kwargs): self.status = 'error' self.msg = 'Failed to find DB_CONNECTION in config_file' raise Exception(self.msg) - + self.sptind_config = None if('SPTIND_CONFIG' in confobj[self.instance]): @@ -222,9 +223,9 @@ def __init__(self, path, **kwargs): self.socket = None self.dbschema = None - self.cookiename = '' - self.accesstbl = '' - self.usertbl = '' + self.cookiename = '' + self.accesstbl = '' + self.usertbl = '' self.propfilter = '' self.fileid = '' self.accessid = '' @@ -247,7 +248,7 @@ def __init__(self, path, **kwargs): if self.debug: logging.debug('') logging.debug('dbms = %s', self.dbms) - + # ORACLE Connection @@ -309,15 +310,15 @@ def __init__(self, path, **kwargs): # KOA Proprietary Access parameters - self.cookiename = '' + self.cookiename = '' if('COOKIENAME' in confobj[self.db_connection]): self.cookiename = confobj[self.db_connection]['COOKIENAME'] - self.accesstbl = '' + self.accesstbl = '' if('ACCESS_TBL' in confobj[self.db_connection]): self.accesstbl = confobj[self.db_connection]['ACCESS_TBL'] - self.usertbl = '' + self.usertbl = '' if('USERS_TBL' in confobj[self.db_connection]): self.usertbl = confobj[self.db_connection]['USERS_TBL'] @@ -428,7 +429,7 @@ def __init__(self, path, **kwargs): self.msg = 'Failed to MySQL DBMS user ID in config_file' raise Exception(self.msg) - + # PASSWORD if ('Password' in confobj[self.db_connection]): @@ -490,7 +491,7 @@ def __init__(self, path, **kwargs): # DATABASE - + if 'DataBase' in confobj[self.db_connection]: self.database = confobj[self.db_connection]['DataBase'] @@ -511,7 +512,7 @@ def __init__(self, path, **kwargs): self.msg = 'Failed to find PostgreSQL username in config_file.' raise Exception(self.msg) - + # PASSWORD if 'Password' in confobj[self.db_connection]: @@ -686,5 +687,5 @@ def __init__(self, path, **kwargs): logging.debug(' accessid = ' + str(self.accessid)) logging.debug(' racol = ' + str(self.racol)) logging.debug(' deccol = ' + str(self.deccol)) - - return + + return diff --git a/TAP/datadictionary.py b/TAP/datadictionary.py index 3f556d7..7d27719 100644 --- a/TAP/datadictionary.py +++ b/TAP/datadictionary.py @@ -3,9 +3,10 @@ # https://github.com/Caltech-IPAC/nexsciTAP/blob/master/LICENSE -import os import logging +import os import re + from astropy.io import ascii @@ -92,8 +93,8 @@ def __init__(self, conn, table, connectInfo, **kwargs): if self.debug: logging.debug(f'dbtable = {self.dbtable:s}') - logging.debug(f'ddtbl = ' + str(self.ddtbl)) - logging.debug(f'ddfile = ' + str(self.ddfile)) + logging.debug('ddtbl = ' + str(self.ddtbl)) + logging.debug('ddfile = ' + str(self.ddfile)) # @@ -111,20 +112,20 @@ def __init__(self, conn, table, connectInfo, **kwargs): if self.debug: logging.debug('colnames: ' + str(tcolnames)) - i = 0 + i = 0 for trow in tdata: - + dbname = '' desc = '' if 'name' in tcolnames: dbname = trow['name'].lower() desc = dbname - + type = '' if 'intype' in tcolnames: type = trow['intype'] - + unit = '' if 'units' in tcolnames: unit = trow['units'] @@ -147,7 +148,7 @@ def __init__(self, conn, table, connectInfo, **kwargs): if len(dbname) > width: width = len(dbname) - + self.colname[i] = dbname i = i+1 @@ -182,7 +183,13 @@ def __init__(self, conn, table, connectInfo, **kwargs): else: placeholder = '?' - sql = "select * from " + self.connectInfo["tap_schema"] + "." + self.connectInfo["columns_table"] + " where lower(table_name) = " \ + # For SQLite the schema is ATTACHed under a name held in + # `tap_schema_file`; `tap_schema` is the file path. For + # Oracle/Postgres/MySQL `tap_schema` is the schema name itself. + schema_name = self.connectInfo.get("tap_schema_file") \ + or self.connectInfo["tap_schema"] + + sql = "select * from " + schema_name + "." + self.connectInfo["columns_table"] + " where lower(table_name) = " \ + placeholder if self.debug: @@ -302,7 +309,7 @@ def __init__(self, conn, table, connectInfo, **kwargs): # # { while loop # - + #rows = cursor.fetchmany(self.nfetch) rows = cursor.fetchall() @@ -314,7 +321,7 @@ def __init__(self, conn, table, connectInfo, **kwargs): i = 0 for row in rows: - + # # { for loop: each row in the file represents # a column in data dictionary diff --git a/TAP/propfilter.py b/TAP/propfilter.py index b73ca38..3a38fc2 100644 --- a/TAP/propfilter.py +++ b/TAP/propfilter.py @@ -3,16 +3,14 @@ # https://github.com/Caltech-IPAC/nexsciTAP/blob/master/LICENSE -import os import logging - -import datetime +import os import time -from TAP.writeresult import writeResult from TAP.datadictionary import dataDictionary from TAP.tablenames import TableNames -from TAP.tablevalidator import TableValidator, TableValidationError +from TAP.tablevalidator import TableValidationError, TableValidator +from TAP.writeresult import writeResult class propFilter: @@ -229,7 +227,7 @@ def __init__(self, **kwargs): logging.debug( 'password = [Not shown for security reasons].') # Change to the following to temporarily debug login - + # logging.debug(f'userid = {self.userid:s}') # logging.debug(f'password = {self.password:s}') @@ -389,7 +387,7 @@ def __init__(self, **kwargs): try: self.maxrec = int(maxrecstr) - except Exception as e: + except Exception: self.msg = "Failed to convert input maxrec value [" + \ maxrecstr + "] to integer." @@ -438,7 +436,7 @@ def __init__(self, **kwargs): logging.debug('Connected to Oracle, database ' + self.dbserver) - except Exception as e: + except Exception: self.status = 'error' self.msg = 'Failed to connect to cx_Oracle' @@ -497,7 +495,7 @@ def __init__(self, **kwargs): logging.debug('') logging.debug('TAP_SCHEMA attached') - except Exception as e: + except Exception: self.status = 'error' self.msg = 'Failed to connect to SQLite3 databases' @@ -749,8 +747,8 @@ def __init__(self, **kwargs): " where " + self.fileid + " in(select " + self.fileid_allowed + \ " from " + self.tmp_fileidAlloweddbtbl + ")" - if(len(self.wherestr) > 0): - sql = sql + ' and (' + self.wherestr[6:] + ')' + if(len(self.wherestr) > 0): + sql = sql + ' and (' + self.wherestr[6:] + ')' if self.debug: logging.debug('') @@ -846,7 +844,7 @@ def __init__(self, **kwargs): coldesc=self.coldesc, racol=self.racol, deccol=self.deccol) - + #deccol=self.deccol, #debug=self.debug) @@ -863,7 +861,7 @@ def __init__(self, **kwargs): if self.debug: logging.debug('') - logging.debug(f'writeResult cursor closed') + logging.debug('writeResult cursor closed') self.outpath = wresult.outpath @@ -871,14 +869,14 @@ def __init__(self, **kwargs): # # Drop all tmp DB tables for oracle because oracle v.12.xxx's - # global temporary table is permanent, not really temporary + # global temporary table is permanent, not really temporary # if (self.dbms.lower() == 'oracle'): - + try: self.conn.close() - + if self.debug: logging.debug ('') logging.debug ('returned from oracle conn.close()') @@ -888,12 +886,12 @@ def __init__(self, **kwargs): if self.debug: logging.debug ('') logging.debug (f'conn.close exception: {str(e):s}') - + # # re-connect # - time.sleep (2.0) - + time.sleep (2.0) + userid = self.connectInfo['userid'] dbserver = self.connectInfo['dbserver'] password = self.connectInfo['password'] @@ -902,11 +900,11 @@ def __init__(self, **kwargs): if self.debug: logging.debug ('') - logging.debug (f'xxx0') + logging.debug ('xxx0') logging.debug (f'userid= {userid:s}') logging.debug (f'password= {password:s}') logging.debug (f'dbserver= {dbserver:s}') - + try: self.conn = cx_Oracle.connect ( \ userid, \ @@ -930,25 +928,25 @@ def __init__(self, **kwargs): logging.debug('') logging.debug('Failed to re-connected to Oracle ') logging.debug(f'e= {str(e):s}') - - pass - + + pass + try: self.__dropDbtbl__(self.tmp_fileidAlloweddbtbl) - + if self.debug: logging.debug('') logging.debug( \ 'returned dropDbtbl: tmp_fileidAlloweddbtbl') except Exception as e: - + if self.debug: logging.debug('') logging.debug( \ f'drop fileidAlloweddbtbl exception: {str(e):s}') pass - + if self.debug: logging.debug('') logging.debug('tmp_fileidAlloweddbtbl dropped') @@ -962,14 +960,14 @@ def __init__(self, **kwargs): try: self.__dropDbtbl__(self.tmp_accessiddbtbl) - + if self.debug: logging.debug('') logging.debug( \ 'returned dropDbtbl: tmp_accessiddbtbl') except Exception as e: - + if self.debug: logging.debug('') logging.debug( \ @@ -1295,8 +1293,8 @@ def __validateUser__(self, cookiename, cookiestr, propfilter, # if self.debug: - logging.debug(f'cookiename = ' + str(cookiename)) - logging.debug(f'cookiestr = ' + str(cookiestr)) + logging.debug('cookiename = ' + str(cookiename)) + logging.debug('cookiestr = ' + str(cookiestr)) msg = '' ind = cookiestr.find(cookiename) @@ -1325,17 +1323,17 @@ def __validateUser__(self, cookiename, cookiestr, propfilter, return arr = substr1.split('|') - narr = len(arr) + len(arr) self.userid = arr[0] self.encodedpass = arr[1] if self.debug: logging.debug('') - logging.debug(f'substr1 = ' + str(substr1)) - logging.debug(f'arr = ' + str(arr)) - logging.debug(f'userid = ' + str(self.userid)) - logging.debug(f'encodedpass = ' + str(self.encodedpass)) + logging.debug('substr1 = ' + str(substr1)) + logging.debug('arr = ' + str(arr)) + logging.debug('userid = ' + str(self.userid)) + logging.debug('encodedpass = ' + str(self.encodedpass)) if(self.userid == 'anon'): self.userid = '' @@ -1397,8 +1395,8 @@ def __validateUser__(self, cookiename, cookiestr, propfilter, if self.debug: logging.debug('') - logging.debug(f'password = [Not shown for security reasons.]') - # Change to following to debug password + logging.debug('password = [Not shown for security reasons.]') + # Change to following to debug password # logging.debug(f'password = {password:s}') if(len(password) == 0): @@ -1589,7 +1587,7 @@ def __createTmpAccessiddb__(self, tmp_accessiddbtbl, userid, accessid, # { createTmpAccessiddb # # - # first drop tmp_accessiddbtbl in case table with the same name + # first drop tmp_accessiddbtbl in case table with the same name # might already exist # @@ -1601,8 +1599,8 @@ def __createTmpAccessiddb__(self, tmp_accessiddbtbl, userid, accessid, if self.debug: logging.debug('') logging.debug('returned dropDbtbl') - logging.debug(f'temp dbtbl dropped') - + logging.debug('temp dbtbl dropped') + except Exception as e: self.msg = 'Failed to create tmp_accessiddbtbl: ' + str(e) @@ -1644,7 +1642,7 @@ def __createTmpAccessiddb__(self, tmp_accessiddbtbl, userid, accessid, logging.debug('') logging.debug('tmp_accessiddbtbl created') - + # Insert into tmp_accessiddbtbl: select accessid allowed # by userid: accessidtbl @@ -1670,8 +1668,8 @@ def __createTmpAccessiddb__(self, tmp_accessiddbtbl, userid, accessid, raise Exception(self.msg) - # check rowcount in tmp_accessiddbtbl - + # check rowcount in tmp_accessiddbtbl + sql = "select * from " + tmp_accessiddbtbl if self.debug: @@ -1715,10 +1713,10 @@ def __createTmpFileiddb__(self, tmp_fileiddbtbl, fileid, fileid_allowed, if self.debug: logging.debug('') - logging.debug(f'Enter createTmpFileiddb') + logging.debug('Enter createTmpFileiddb') if (self.dbms.lower() == 'pgsql'): - + sql = "create temporary table " + tmp_fileiddbtbl + \ "(" + fileid_allowed + " varchar(35)) on commit preserve rows" else: @@ -1753,21 +1751,21 @@ def __createTmpFileiddb__(self, tmp_fileiddbtbl, fileid, fileid_allowed, if(self.propfilter == 'koa'): if(len(self.userid) > 0): - + if (self.dbms.lower() == 'oracle'): access_constraint = \ "((current_date > add_months(date_obs, propint))" + \ " or(lower(" + accessid + ") in(select " + \ accessid + " from " + tmp_accessiddbtbl + ")))" - + elif (self.dbms.lower() == 'pgsql'): access_constraint = "((current_date > " + \ "(date_obs + (propint * '1 month'::interval)))" + \ " or (lower(" + accessid + ") in (select " + \ accessid + " from " + tmp_accessiddbtbl + ")))" - + else: if (self.dbms.lower() == 'oracle'): @@ -1779,7 +1777,7 @@ def __createTmpFileiddb__(self, tmp_fileiddbtbl, fileid, fileid_allowed, access_constraint = "(current_date > " + \ "(date_obs + (propint * '1 month'::interval)))" - + if self.debug: logging.debug('') logging.debug( @@ -1802,20 +1800,20 @@ def __createTmpFileiddb__(self, tmp_fileiddbtbl, fileid, fileid_allowed, "add_months(obsdate, l0propint))" + \ " or(lower(" + accessid + ") in(select " + \ accessid + " from " + tmp_accessiddbtbl + ")))" - + elif (self.dbms.lower() == 'pgsql'): access_constraint = "((current_date > " + \ "(obsdate + (l0propint * '1 month'::interval)))" + \ " or (lower(" + accessid + ") in (select " + \ accessid + " from " + tmp_accessiddbtbl + ")))" - + else: if (self.dbms.lower() == 'oracle'): access_constraint = \ "(current_date > add_months(obsdate, l0propint))" - + elif (self.dbms.lower() == 'pgsql'): access_constraint = "(current_date > " + \ @@ -1832,20 +1830,20 @@ def __createTmpFileiddb__(self, tmp_fileiddbtbl, fileid, fileid_allowed, "add_months(obsdate, l1propint))" + \ " or(lower(" + accessid + ") in(select " + \ accessid + " from " + tmp_accessiddbtbl + ")))" - + elif (self.dbms.lower() == 'pgsql'): access_constraint = "((current_date > " + \ "(obsdate + (l1propint * '1 month'::interval)))" + \ " or (lower(" + accessid + ") in (select " + \ accessid + " from " + tmp_accessiddbtbl + ")))" - + else: if (self.dbms.lower() == 'oracle'): access_constraint = \ "(current_date > add_months(obsdate, l1propint))" - + elif (self.dbms.lower() == 'pgsql'): access_constraint = "(current_date > " + \ @@ -1861,20 +1859,20 @@ def __createTmpFileiddb__(self, tmp_fileiddbtbl, fileid, fileid_allowed, "add_months(obsdate, l2propint))" + \ " or(lower(" + accessid + ") in(select " + \ accessid + " from " + tmp_accessiddbtbl + ")))" - + elif (self.dbms.lower() == 'pgsql'): access_constraint = "((current_date > " + \ "(obsdate + (l2propint * '1 month'::interval)))" + \ " or (lower(" + accessid + ") in (select " + \ accessid + " from " + tmp_accessiddbtbl + ")))" - + else: if (self.dbms.lower() == 'oracle'): access_constraint = \ "(current_date > add_months(obsdate, l2propint))" - + elif (self.dbms.lower() == 'pgsql'): access_constraint = "(current_date > " + \ @@ -1916,7 +1914,7 @@ def __createTmpFileiddb__(self, tmp_fileiddbtbl, fileid, fileid_allowed, logging.debug(f'{self.msg:s}') raise Exception(self.msg) - + rowcnt_conditional = cursor.rowcount if self.debug: logging.debug('') @@ -2119,13 +2117,13 @@ def __dropDbtbl__(self, dbtable, **kwargs): logging.debug(f'table {dbtable:s} successfully dropped') except Exception as e: - + if self.debug: logging.debug('') logging.debug(f'drop table exception: {str(e):s}') - + pass - + return # diff --git a/TAP/tablenames.py b/TAP/tablenames.py index a3f230d..1ea9106 100644 --- a/TAP/tablenames.py +++ b/TAP/tablenames.py @@ -4,10 +4,10 @@ import itertools -import sqlparse -from sqlparse.sql import IdentifierList, Identifier -from sqlparse.tokens import Keyword, DML +import sqlparse +from sqlparse.sql import Identifier, IdentifierList +from sqlparse.tokens import DML, Keyword class TableNames: diff --git a/TAP/tablevalidator.py b/TAP/tablevalidator.py index e2d25ca..74236b7 100644 --- a/TAP/tablevalidator.py +++ b/TAP/tablevalidator.py @@ -32,7 +32,15 @@ def __init__(self, conn, connectInfo=None, debug=0): self.tables_table = 'tables' if connectInfo is not None: - if 'tap_schema' in connectInfo: + # `tap_schema_file` is the ATTACH-AS name for SQLite ATTACHed + # schemas (set in tapquery.py:395). For Oracle/Postgres/MySQL + # there's no ATTACH and `tap_schema` is the schema name itself. + # For SQLite, `tap_schema` is the FILE PATH and using it directly + # in a `SELECT ... FROM .tables` reference produces a SQL + # syntax error. Prefer the more-specific key when it's set. + if 'tap_schema_file' in connectInfo: + self.tap_schema = connectInfo['tap_schema_file'] + elif 'tap_schema' in connectInfo: self.tap_schema = connectInfo['tap_schema'] if 'tables_table' in connectInfo: self.tables_table = connectInfo['tables_table'] diff --git a/TAP/tap.py b/TAP/tap.py index c2102ae..ef6a4d8 100755 --- a/TAP/tap.py +++ b/TAP/tap.py @@ -2,34 +2,29 @@ # This code is released with a BSD 3-clause license. License information is at # https://github.com/Caltech-IPAC/nexsciTAP/blob/master/LICENSE -import os -import sys +import cgi +import datetime import fcntl import html - import logging - -import datetime -import time +import math +import os import signal -import math - -import cgi +import sys import tempfile +import time import xmltodict -from bs4 import BeautifulSoup - from ADQL.adql import ADQL - +from bs4 import BeautifulSoup from spatial_index import SpatialIndex -from TAP.tapquery import tapQuery from TAP.configparam import configParam from TAP.propfilter import propFilter from TAP.tablenames import TableNames -from TAP.vositables import vosiTables from TAP.tablevalidator import TableValidationError, TableValidator +from TAP.tapquery import tapQuery +from TAP.vositables import vosiTables class Tap: @@ -230,7 +225,7 @@ def __run__(self, **kwargs): self.uwsheader = '' - + for key in self.form: if self.debug: logging.debug(f' key: {key:<15} val: {self.form[key].value:s}') @@ -244,7 +239,7 @@ def __run__(self, **kwargs): if(key.lower() == 'lang'): self.lang = self.form[key].value self.param['lang'] = self.form[key].value - + if(key.lower() == 'request'): self.param['request'] = self.form[key].value @@ -305,7 +300,7 @@ def __run__(self, **kwargs): self.maxrec = int(maxrec_dbl) - except Exception as e: + except Exception: self.msg = "Failed to convert input maxrec value [" + \ self.param['maxrec'] + "] to integer." @@ -314,7 +309,7 @@ def __run__(self, **kwargs): if (len(self.lang) == 0): self.lang = 'ADQL' - self.param['lang'] = 'ADQL' + self.param['lang'] = 'ADQL' self.nparam = len(self.param) @@ -354,11 +349,11 @@ def __run__(self, **kwargs): self.configext = arr[0] arr = arr[1:] narr = len(arr) - + if self.debug: logging.debug('') logging.debug(f'id= {self.id:s} configext= {self.configext:s}') - + if(arr[0] == 'async'): self.tapcontext = 'async' @@ -370,7 +365,7 @@ def __run__(self, **kwargs): self.tapcontext = 'capabilities' elif (arr[0] == 'tables'): self.tapcontext = 'tables' - + if (len(self.tapcontext) == 0): self.msg = 'PATH_INFO: sync or async not found.' self.__printError__('votable', self.msg, errcode='404') @@ -382,17 +377,17 @@ def __run__(self, **kwargs): if(narr > 1 and len(arr[1]) > 0): # - #{ narr > 1: + #{ narr > 1: # input query contains more than TAP/(sync/async)/statuskey for - # retrive status.xml, we set getstatus=1; - # + # retrive status.xml, we set getstatus=1; + # self.getstatus = 1 self.id = arr[1] if self.debug: logging.debug('') logging.debug(f'id= {self.id:s} getstatus= {self.getstatus:d}') - + if (len(self.id) == 0): self.msg = 'Failed to find jobid for getStatus request.' self.__printError__('votable', self.msg, errcode='404') @@ -400,7 +395,7 @@ def __run__(self, **kwargs): len_id = len(self.id) ind = self.pathinfo.find(self.id) - + i = ind + len_id + 1 self.statuskey = self.pathinfo[i:] @@ -423,10 +418,10 @@ def __run__(self, **kwargs): logging.debug(f'setstatus = {self.setstatus:d}') logging.debug(f'id = {self.id:s}') # - #} end parsing pathinfo for narr > 1: + #} end parsing pathinfo for narr > 1: # input query contains more than TAP/(sync/async)/statuskey for - # retrive status.xml, we set getstatus=1; - # + # retrive status.xml, we set getstatus=1; + # self.cookiestr = os.getenv('HTTP_COOKIE', default='') @@ -436,7 +431,7 @@ def __run__(self, **kwargs): logging.debug(f'cookiestr (http) = {self.cookiestr:s}') if (len(self.cookiestr) == 0): - + self.cookiestr = self.token if self.debug: logging.debug('') @@ -475,10 +470,10 @@ def __run__(self, **kwargs): self.config = None try: self.config = configParam(self.configpath, instance=self.instance, debug=self.debug) - + if self.debug: logging.debug('') - logging.debug(f'returned configParam:') + logging.debug('returned configParam:') logging.debug('%s', self.config) except Exception as e: @@ -486,7 +481,7 @@ def __run__(self, **kwargs): if self.debug: logging.debug('') logging.debug(f'config exception: {str(e):s}') - + self.msg = 'Internal system error: config variables read error.' self.__printError__('votable', str(e), errcode='500') @@ -500,7 +495,7 @@ def __run__(self, **kwargs): logging.debug('') logging.debug(f'workdir = {self.workdir:s}') - + self.arraysize = self.config.arraysize self.cookiename = self.config.cookiename @@ -528,7 +523,7 @@ def __run__(self, **kwargs): - # Special case: HTTP DELETE. We only need + # Special case: HTTP DELETE. We only need # the REQUEST_METHOD and PATH_INFO environment # variable. The first must be DELETE and the # second is the directory of th TAP job we wish @@ -548,7 +543,7 @@ def __run__(self, **kwargs): # logging.debug('') # logging.debug(f'DELETE: {delete_dir:s}') - + # # Initialize statdict dict # @@ -570,7 +565,7 @@ def __run__(self, **kwargs): self.statdict['destruction'] = '' self.statdict['endtime'] = '' self.statdict['duration'] = 0 - + self.statdict['stime'] = None self.statdict['resulturl'] = '' @@ -588,7 +583,7 @@ def __run__(self, **kwargs): # # { Make workspace: - # make TAP subdir if it doesn't exist, + # make TAP subdir if it doesn't exist, # make a workspace name with unique id # @@ -639,7 +634,7 @@ def __run__(self, **kwargs): else: # - # { input jobid exists: + # { input jobid exists: # Retrieve workspace from id # @@ -667,14 +662,14 @@ def __run__(self, **kwargs): # #{ if tapcontext is one of vosiEnpoint, take care of take care of VOSI - # output and return + # output and return # if (self.tapcontext == 'availability'): self.__printVosiAvailability__ () if (self.tapcontext == 'capabilities'): self.__printVosiCapability__ () - + # # vositable: make up vositbl filepath # @@ -686,11 +681,11 @@ def __run__(self, **kwargs): logging.debug(f'vosipath = {self.vosipath:s}') if (self.tapcontext == 'tables'): - + if self.debug: - + logging.debug('') - logging.debug(f'call printVosiTables:') + logging.debug('call printVosiTables:') logging.debug(f'statuspath = {self.statuspath:s}') self.__printVosiTables__ (self.config.connectInfo, \ @@ -702,7 +697,7 @@ def __run__(self, **kwargs): # #} end vosiEnpoint outputs # - + # # Make up status file name # @@ -720,7 +715,7 @@ def __run__(self, **kwargs): # # If async and phase == PENDING: send 303 with statusurl and exit # - # If async and phase != RUN and != ABORT, i.e. bogus value: + # If async and phase != RUN and != ABORT, i.e. bogus value: # return with 400 client error immediately. # @@ -740,13 +735,13 @@ def __run__(self, **kwargs): if ((self.tapcontext == 'async') and \ (self.getstatus == 0)): - + if (len(self.param['phase']) == 0): # # { If phase not specified: set to PENDING and exit # - + if self.debug: logging.debug('') logging.debug('case: set to PENDING') @@ -771,23 +766,23 @@ def __run__(self, **kwargs): # # } end of PENDING case # - + elif (self.param['phase'].lower() == 'abort'): # # { If phase is abort # - + if self.debug: logging.debug('') logging.debug('case bad phase input: abort') self.msg = "There is no existing job to be aborted." - + self.statdict['phase'] = 'ABORTED' self.statdict['jobid'] = self.workspace self.statdict['errmsg'] = self.msg - + """ self.__printError__('votable', self.msg, errcode='400') @@ -811,14 +806,14 @@ def __run__(self, **kwargs): logging.debug('Return HTTP redirect status.xml and exit.') sys.exit() - + elif ((self.param['phase'].lower() != 'run') and \ (self.param['phase'].lower() != 'abort')): - + # # { If phase is bogus value # - + if self.debug: logging.debug('') logging.debug('case bad phase input:') @@ -828,7 +823,7 @@ def __run__(self, **kwargs): self.statdict['jobid'] = self.workspace self.statdict['errmsg'] = self.msg - + """ self.__printError__('votable', self.msg, errcode='400') @@ -838,7 +833,7 @@ def __run__(self, **kwargs): sys.exit() """ - + self.__writeStatusMsg__(self.statuspath, self.statdict, self.param) @@ -858,41 +853,41 @@ def __run__(self, **kwargs): # # } end of async and getstatus=0 case # - - + + if ((self.tapcontext == 'async') and (self.getstatus == 1)): # #{ async and getstatus=1 case: # - + if (len(self.param['phase']) == 0): # - #{ async getStatus case: tap query includes jobid but - # input phase is blank + #{ async getStatus case: tap query includes jobid but + # input phase is blank # if self.debug: logging.debug('') logging.debug('case: getStatus') - + try: self.__getStatus__(self.workdir, self.id, self.statuskey, \ self.param) except Exception as e: - + self.__printError__(self.format, str(e)) - + # # } end getStatus and printError will exit when done # - + else: - # - # {Tap input with jobid and phase: need to + # + # {Tap input with jobid and phase: need to # parse statuspath to retrieve input parameters # if self.debug: logging.debug('') - logging.debug (f'case: retrieve parameters from workspace') + logging.debug ('case: retrieve parameters from workspace') logging.debug (f'statuspath= {self.statuspath:s}') xmlstruct = None @@ -968,10 +963,10 @@ def __run__(self, **kwargs): doc = xmltodict.parse (xmlstruct) job = doc['uws:job'] - + if self.debug: logging.debug ('') - logging.debug (f'job= ') + logging.debug ('job= ') logging.debug (job) self.statdict['jobid'] = job['uws:jobId'] @@ -986,24 +981,24 @@ def __run__(self, **kwargs): if self.debug: logging.debug ('') - logging.debug (f'param retrieved from status.xml:') - logging.debug (f'jobid: ') + logging.debug ('param retrieved from status.xml:') + logging.debug ('jobid: ') logging.debug (self.statdict["jobid"]) - logging.debug (f'process_id:') + logging.debug ('process_id:') logging.debug (self.statdict["process_id"]) - logging.debug (f'ownerId:') + logging.debug ('ownerId:') logging.debug (self.statdict["ownerId"]) - logging.debug (f'phase:') + logging.debug ('phase:') logging.debug (self.statdict["phase"]) - logging.debug (f'quote:') + logging.debug ('quote:') logging.debug (self.statdict["quote"]) - logging.debug (f'starttime:') + logging.debug ('starttime:') logging.debug (self.statdict["starttime"]) - logging.debug (f'endtime:') + logging.debug ('endtime:') logging.debug (self.statdict["endtime"]) - logging.debug (f'destruction:') + logging.debug ('destruction:') logging.debug (self.statdict["destruction"]) - logging.debug (f'duration:') + logging.debug ('duration:') logging.debug (self.statdict["duration"]) # @@ -1012,8 +1007,8 @@ def __run__(self, **kwargs): # #} end async and getstatus=1 case # - - + + # # Make result table names # @@ -1101,7 +1096,7 @@ def __run__(self, **kwargs): # self.__printAsyncResponse__(self.statusurl) - + if self.debug: logging.debug('') logging.debug(f'returned printAsyncResponse') @@ -1110,7 +1105,7 @@ def __run__(self, **kwargs): # # } end async RUN case # - + elif (self.param['phase'] == 'ABORT'): # # { async ABORT case @@ -1125,8 +1120,8 @@ def __run__(self, **kwargs): if self.debug: logging.debug('') - logging.debug(f'currently executing:') - + logging.debug('currently executing:') + pid = self.statdict['process_id'] if self.debug: logging.debug('') @@ -1135,8 +1130,8 @@ def __run__(self, **kwargs): try: os.kill (pid, signal.SIGKILL) - except Exception as e: - + except Exception: + if self.debug: logging.debug('') logging.debug(f'Error abort job: pid = {pid:d}') @@ -1152,97 +1147,97 @@ def __run__(self, **kwargs): destructtime = datetime.datetime.now() destruction = \ destructtime.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-4] - + etime = datetime.datetime.now() endtime = etime.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-4] if self.debug: logging.debug('') - logging.debug(f'got here0-3') - logging.debug(f'etime:') + logging.debug('got here0-3') + logging.debug('etime:') logging.debug(etime) - + starttimestr = self.statdict['starttime'] - + stime = datetime.datetime.strptime \ (starttimestr, '%Y-%m-%dT%H:%M:%S.%f') if self.debug: logging.debug('') - logging.debug(f'stime:') + logging.debug('stime:') logging.debug(stime) - - durationtime = etime - stime + + durationtime = etime - stime if self.debug: logging.debug('') - logging.debug(f'got here0-4') - logging.debug(f'durationtime:') + logging.debug('got here0-4') + logging.debug('durationtime:') logging.debug(durationtime) - + durationstr = str(durationtime.total_seconds())[:50] if self.debug: logging.debug('') - logging.debug(f'got here0-5') + logging.debug('got here0-5') logging.debug(f'durationstr= {durationstr:s}') - + duration_dbl = float(durationstr) if self.debug: logging.debug('') - logging.debug(f'got here0-6') - + logging.debug('got here0-6') + duration = math.ceil(duration_dbl) if self.debug: logging.debug('') - logging.debug(f'got here0-7') - + logging.debug('got here0-7') + self.statdict['endtime'] = endtime if self.debug: logging.debug('') - logging.debug(f'got here0-8') - + logging.debug('got here0-8') + self.statdict['duration'] = duration if self.debug: logging.debug('') - logging.debug(f'got here0-9') - + logging.debug('got here0-9') + self.statdict['destruction'] = destruction if self.debug: logging.debug('') - logging.debug(f'got here0-10') - + logging.debug('got here0-10') + self.statdict['phase'] = 'ABORTED' - self.statdict['errmsg'] = '' + self.statdict['errmsg'] = '' if self.debug: logging.debug('') - logging.debug(f'got here0-11') - + logging.debug('got here0-11') + """ self.__writeStatusMsg__(self.statuspath, self.statdict, self.param) self.__printAsyncResponse__(self.statusurl) - + if self.debug: logging.debug('') logging.debug(f'returned printAsyncResponse') """ - + elif (self.statdict['phase'] == 'PENDING'): errmsg = 'PENDING job aborted' if self.debug: logging.debug('') - logging.debug(f'call writeAsyncError:') + logging.debug('call writeAsyncError:') logging.debug(f'errmsg= {errmsg:s}') - - + + self.statdict['starttime'] = '' self.statdict['endtime'] = '' - self.statdict['duration'] = 0 - + self.statdict['duration'] = 0 + self.statdict['phase'] = 'ABORTED' self.statdict['errmsg'] = errmsg @@ -1251,7 +1246,7 @@ def __run__(self, **kwargs): self.param) self.__printAsyncResponse__(self.statusurl) - + if self.debug: logging.debug('') logging.debug(f'returned printAsyncResponse') @@ -1263,7 +1258,7 @@ def __run__(self, **kwargs): else: # - # { any other bogus values: this only occured in the initial + # { any other bogus values: this only occured in the initial # PENDING case; the other case that the client send in bogus # case first time around would have been returned by a HTTP # 400 error immediately. @@ -1273,14 +1268,14 @@ def __run__(self, **kwargs): logging.debug ('case bogus phase value') errmsg = "Unknown input job phase=" + self.param['phase'] - + self.statdict['starttime'] = '' self.statdict['endtime'] = '' - self.statdict['duration'] = 0 - + self.statdict['duration'] = 0 + self.statdict['phase'] = 'ERROR' self.statdict['errmsg'] = errmsg - + # # } end async bogus value case # @@ -1299,30 +1294,30 @@ def __run__(self, **kwargs): logging.debug ('call printAsyncResponse') self.__printAsyncResponse__(self.statusurl) - + if self.debug: logging.debug('') - logging.debug(f'returned printAsyncResponse') + logging.debug('returned printAsyncResponse') # - # } end async submit case + # } end async submit case # # - # { Before running query -- both sync and async cases, check - # input parameters to reject obvious input errors - # - + # { Before running query -- both sync and async cases, check + # input parameters to reject obvious input errors + # + if self.debug: logging.debug('') - logging.debug(f'continue: check lang') + logging.debug('continue: check lang') if (self.param['lang'].lower() != 'adql' and \ self.param['lang'].lower() != 'adql-2.0'): - + self.msg = "Input parameter (lang=" + self.param['lang'] + \ - ") error: only lang=ADQL is implemented." - + ") error: only lang=ADQL is implemented." + if(self.tapcontext == 'async'): self.__writeAsyncError__(self.msg, self.statuspath, self.statdict, self.param) @@ -1333,10 +1328,10 @@ def __run__(self, **kwargs): if ((self.tapcontext == 'async') and \ (self.param['phase'].lower() != 'run') and\ (self.param['phase'].lower() != 'abort')): - + self.statdict['phase'] = 'ERROR' self.statdict['jobid'] = self.workspace - + self.msg = "Unknown input job phase=" + self.param['phase'] self.__writeAsyncError__(self.msg, self.statuspath, @@ -1346,7 +1341,7 @@ def __run__(self, **kwargs): if self.debug: logging.debug('') - logging.debug(f'continue: check format') + logging.debug('continue: check format') if ((self.format != 'votable') and \ (self.format != 'ipac') and \ @@ -1372,7 +1367,7 @@ def __run__(self, **kwargs): if self.debug: logging.debug('') - logging.debug(f'continue: check maxrec') + logging.debug('continue: check maxrec') logging.debug(f'self.maxrecstr= {self.maxrecstr:s}') @@ -1541,7 +1536,7 @@ def __run__(self, **kwargs): if self.debug: logging.debug('') - logging.debug(f'ADQL initialized') + logging.debug('ADQL initialized') self.query = adql.sql(query_adql) @@ -1575,7 +1570,7 @@ def __run__(self, **kwargs): tables = tn.extract_tables(self.query) self.dbtable = tables[0] - except Exception as e: + except Exception: if self.debug: logging.debug('') logging.debug('TableName exception') @@ -1584,9 +1579,9 @@ def __run__(self, **kwargs): if len(self.dbtable) == 0: self.msg = 'No table name found in ADQL query.' - + if(self.tapcontext == 'async'): - + self.__writeAsyncError__(self.msg, self.statuspath, self.statdict, self.param) else: @@ -1694,7 +1689,7 @@ def __run__(self, **kwargs): if self.debug: logging.debug('') - logging.debug(f'tapQuery parameters:') + logging.debug('tapQuery parameters:') logging.debug(f'query = [{self.query:s}]') logging.debug(f'workdir = [{self.userWorkdir:s}]') logging.debug(f'format = [{self.format:s}]') @@ -1830,7 +1825,7 @@ def __run__(self, **kwargs): etime = datetime.datetime.now() durationtime = etime - self.statdict['stime'] - + if self.debug: logging.debug('') logging.debug('durationtime:') @@ -1845,7 +1840,7 @@ def __run__(self, **kwargs): if self.debug: logging.debug('') logging.debug(f'duration_dbl= {duration_dbl:f}') - + duration = math.ceil(duration_dbl) if self.debug: @@ -2020,7 +2015,7 @@ def __printStatus__(self, key, retval, outtype, **kwargs): # # Header # - + retvalstr = str(retval) if self.debug: logging.debug('') @@ -2034,16 +2029,16 @@ def __printStatus__(self, key, retval, outtype, **kwargs): uwsschema = ' xmlns:uws="http://www.ivoa.net/xml/UWS/v1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema" xsi:schemaLocation="http://www.ivoa.net/xml/UWS/v1.0 http://www.ivoa.net/xml/UWS/v1.0">' - + if ((key == 'error') or (key == 'errorSummary') or (key == 'errmsg')): - + if(outtype == 'xml'): - self.__printError__ ('votable', retval) + self.__printError__ ('votable', retval) else: - self.__printError__ ('plain', retval) - + self.__printError__ ('plain', retval) + # # parameters and results vosi endpoints # @@ -2054,40 +2049,40 @@ def __printStatus__(self, key, retval, outtype, **kwargs): print("Content-type: text/xml\r") print("\r") print('') - + if(key == 'parameters'): - + if self.debug: logging.debug('') logging.debug('key = parameters') - + ind = retvalstr.index('>') if self.debug: logging.debug('') logging.debug('ind=') logging.debug(ind) - + if self.debug: logging.debug('') logging.debug(f'retvalstr[0:ind]= {retvalstr[0:ind-1]:s}') logging.debug(f'retvalstr[ind+1]= {retvalstr[ind+1]:s}') - - val = retvalstr[0:ind] + uwsschema + retvalstr[ind+1:] + + val = retvalstr[0:ind] + uwsschema + retvalstr[ind+1:] if self.debug: logging.debug('') logging.debug(f'val= {val:s}') - + print(val) sys.stdout.flush() elif(key == 'results'): - + ind = retvalstr.index('>') - val = retvalstr[0:ind] + uwsschema + retvalstr[ind+1:] + val = retvalstr[0:ind] + uwsschema + retvalstr[ind+1:] if self.debug: logging.debug('') logging.debug(f'val= {val:s}') - + print(val) sys.stdout.flush() @@ -2097,10 +2092,10 @@ def __printStatus__(self, key, retval, outtype, **kwargs): print (retval) sys.stdout.flush() - + if self.debug: logging.debug('Write status to user and exit.') - + sys.exit() # # } end of printStatus @@ -2112,7 +2107,7 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): # # { initialize format to votable but after extracting format - # the output should follow the format + # the output should follow the format # if self.debug: logging.debug('') @@ -2120,7 +2115,7 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): logging.debug('key= ') logging.debug(key) - + isExist = os.path.exists(self.statuspath) if(isExist == 0): @@ -2219,13 +2214,13 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): logging.debug('') logging.debug('key == parameters') logging.debug('call __printStatus') - + self.__printStatus__('parameters', parameters, 'xml') # # } end key=parameters # - + elif ((key == 'phase') or (key == 'startTime') or (key == 'endTime') @@ -2246,7 +2241,7 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): if self.debug: logging.debug('') logging.debug(\ - f'key == phase, executionduration or destruction etc.') + 'key == phase, executionduration or destruction etc.') # # { Single value return # @@ -2296,8 +2291,8 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): if self.debug: logging.debug('') - logging.debug(f'key == error') - + logging.debug('key == error') + retval = '' errmsg = '' @@ -2305,11 +2300,11 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): if self.debug: logging.debug('') - logging.debug(f'key == error') + logging.debug('key == error') try: errmsg = job['uws:errorSummary']['uws:message'] - except Exception as e: + except Exception: self.printError (\ self.format, 'error extracting errorSummary') @@ -2318,17 +2313,17 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): logging.debug(f'errmsg: {errmsg:s}') if (self.format == 'votable'): - + if (len(errmsg) > 0): outstr = f' {errmsg:s}' else: - outstr = f' ' + outstr = ' ' self.__printStatus__('errorSummary', outstr, 'xml') - + else: if (len(errmsg) > 0): - outstr = errmsg + outstr = errmsg else: outstr = 'No error message' @@ -2351,7 +2346,7 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): logging.debug(\ 'case results or results/result or results/resulturl') - + if (key == 'results'): # # { if results @@ -2359,7 +2354,7 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): if self.debug: logging.debug('') logging.debug('key= results') - + if (phase.lower() == 'completed'): # # { if reulsts case and phase = completed @@ -2378,35 +2373,35 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): # logging.debug(result) #results = job['uws:results'] - + #if self.debug: # logging.debug('') # logging.debug(f'results: {results:s}') - + #outstr = results - except Exception as e: - + except Exception: + if self.debug: logging.debug('') logging.debug('error retrieving results') - + self.__printError__(self.format, \ 'Failed to extract results', errcode='400') - + if self.debug: logging.debug('') logging.debug('results:') logging.debug(results) logging.debug('result:') logging.debug(result) - + self.__printStatus__(key, results, 'xml') sys.exit() - + + # + # } end results case # - # } end results case - # else: # # { if reulsts case but phase NOT completed @@ -2419,13 +2414,13 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): # # } end if results # - else: + else: # # { if results/resulturl or 'results/result: # if (phase.lower() == 'completed'): # - # { if reulsts/resulturl or results/result cases + # { if reulsts/resulturl or results/result cases # and phase = completed # try: @@ -2433,12 +2428,12 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): resulturl = \ job['uws:results']['uws:result']['@xlink:href'] - except Exception as e: - + except Exception: + if self.debug: logging.debug('') logging.debug('error retrieving result') - + self.__printError__ (self.format, \ 'Error retrieving result or resulturl') @@ -2452,7 +2447,7 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): self.__printStatus__(key, resulturl, 'plain') sys.exit() - + elif (key == 'results/result'): indx = resulturl.find(workspace) @@ -2466,7 +2461,7 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): fp = None try: fp = open(resultpath, 'r') - except Exception as e: + except Exception: msg = 'Failed to open result file: ' + resultpath self.__printError__(format, msg, errcode='400') @@ -2496,31 +2491,31 @@ def __getStatus__(self, workdir, workspace, key, param, **kwargs): fp.close() sys.exit() - + # - # } end results/resulturl or results/result cases + # } end results/resulturl or results/result cases # and phase completed - # - + # + else: # - # { if reulsts/resulturl or results/result cases + # { if reulsts/resulturl or results/result cases # and phase NOT completed # self.__printError__ (self.format, \ 'No result or resulturl because phase is not COMPLETED') - + # - # } end results/resulturl or results/result cases + # } end results/resulturl or results/result cases # and phase NOT completed - # - + # + # # } if results/resulturl or 'results/result: # # - # } end results or results/resulturl or results/result cases - # + # } end results or results/resulturl or results/result cases + # else: msg = f'key {key:s} is not a valid key' @@ -2535,8 +2530,8 @@ def __printError__(self, fmt, errmsg, **kwargs): # # { # - - errcode = '200' + + errcode = '200' if ('errcode' in kwargs): errcode = kwargs['errcode'] @@ -2562,7 +2557,7 @@ def __printError__(self, fmt, errmsg, **kwargs): if(self.infomsg[-1] == '?'): print(self.infomsg + 'dbtable=' + html.escape(self.dbtable)) else: - print(self.infomsg) + print(self.infomsg) print('') print('') @@ -2577,8 +2572,8 @@ def __printError__(self, fmt, errmsg, **kwargs): if(self.infomsg[-1] == '?'): print(self.infomsg + 'dbtable=' + self.dbtable) else: - print(self.infomsg) - + print(self.infomsg) + sys.stdout.flush() sys.exit() @@ -2603,22 +2598,22 @@ def __writeAsyncError__(self, errmsg, statuspath, statdict, param, **kwargs): # # Set status parameters # - + if self.debug: logging.debug('') - logging.debug(f'From writeAsyncError') - logging.debug(f'statdict["stime"]:') + logging.debug('From writeAsyncError') + logging.debug('statdict["stime"]:') logging.debug(statdict['stime']) - + if (statdict['stime'] is not None): - + etime = datetime.datetime.now() endtime = etime.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-4] durationtime = etime - statdict['stime'] durationstr = str(durationtime.total_seconds())[:10] duration_dbl = float(durationstr) - + duration = math.ceil(duration_dbl) statdict['endtime'] = endtime @@ -2627,8 +2622,8 @@ def __writeAsyncError__(self, errmsg, statuspath, statdict, param, **kwargs): else: statdict['starttime'] = '' statdict['endtime'] = '' - statdict['duration'] = 0 - + statdict['duration'] = 0 + statdict['phase'] = 'ERROR' statdict['errmsg'] = errmsg @@ -2639,8 +2634,8 @@ def __writeAsyncError__(self, errmsg, statuspath, statdict, param, **kwargs): # # } end of writeAsyncError # - - + + def __printSyncResult__(self, resultpath, format, **kwargs): @@ -2758,9 +2753,9 @@ def __writeStatusMsg__(self, statuspath, statdict, param, **kwargs): if self.debug: logging.debug('') - logging.debug(f'Enter writeStatusMsg') + logging.debug('Enter writeStatusMsg') logging.debug(f'phase= {statdict["phase"]:s}') - + # # { TAP status result always written in XML format # input maxrecstr is stored in self.maxrecstr @@ -2769,7 +2764,7 @@ def __writeStatusMsg__(self, statuspath, statdict, param, **kwargs): format = param['format'].lower() maxrecstr = '' - + if (len(self.maxrecstr) == 0): maxrecstr = str(param['maxrec']) else: @@ -2779,13 +2774,13 @@ def __writeStatusMsg__(self, statuspath, statdict, param, **kwargs): logging.debug('') logging.debug(f'maxrecstr= {maxrecstr:s}') logging.debug(f"maxrec= {param['maxrec']:d}") - + fp = None try: fp = open(statuspath, 'w+') os.chmod(statuspath, 0o664) - except Exception as e: + except Exception: msg = 'Failed to open/create status file.' self.__printError__(format, msg) @@ -2815,7 +2810,7 @@ def __writeStatusMsg__(self, statuspath, statdict, param, **kwargs): errmsg = statdict['errmsg'] fp.write('\n') - + fp.write (self.uwsheader) fp.write ('\n') @@ -2861,7 +2856,7 @@ def __writeStatusMsg__(self, statuspath, statdict, param, **kwargs): if self.debug: logging.debug('') - logging.debug(f'wrote job status file') + logging.debug('wrote job status file') # @@ -2894,7 +2889,7 @@ def __writeStatusMsg__(self, statuspath, statdict, param, **kwargs): if self.debug: logging.debug('') - logging.debug(f'done writeAsyncError') + logging.debug('done writeAsyncError') # # Note: closing file automatically released the lock @@ -2911,7 +2906,7 @@ def __getDatalevel__(self, dbtable, **kwargs): # # { getDataLevel # - + if self.debug: logging.debug('') logging.debug('Enter __getDatalevel__') @@ -2950,12 +2945,12 @@ def __getDatalevel__(self, dbtable, **kwargs): # } end getDataLevel # - + def __printVosiTables__ (self, connectInfo, vosipath, **kwargs): # # { printVosiTables # - + if self.debug: logging.debug('') logging.debug('Enter __printVosiTables__') @@ -2979,13 +2974,13 @@ def __printVosiTables__ (self, connectInfo, vosipath, **kwargs): logging.debug('') logging.debug('call vosiTables') - vositbl = vosiTables (dbms=dbms, \ + vosiTables (dbms=dbms, \ dbserver=dbserver, \ userid=userid, \ password=password, \ outpath=vosipath, \ debug=1) - + if self.debug: logging.debug('') logging.debug('returned vosiTables') @@ -2997,7 +2992,7 @@ def __printVosiTables__ (self, connectInfo, vosipath, **kwargs): logging.debug('VosiTables exception:') logging.debug(f'exception: {str(e):s}') - self.__printError__ ('votable', str(e)) + self.__printError__ ('votable', str(e)) # # print out vosi table in the workspace @@ -3006,7 +3001,7 @@ def __printVosiTables__ (self, connectInfo, vosipath, **kwargs): fp = None try: fp = open(vosipath, 'r') - except Exception as e: + except Exception: msg = 'Failed to open vositable path: ' + vosipath self.__printError__('votable', msg, errcode='400') @@ -3037,7 +3032,7 @@ def __printVosiTables__ (self, connectInfo, vosipath, **kwargs): def __printVosiAvailability__ (self, **kwargs): - + # # { printVosiAvailability # @@ -3045,8 +3040,8 @@ def __printVosiAvailability__ (self, **kwargs): print ('') print ('') print ('') print (' true') print (' TAP service available.') @@ -3085,7 +3080,7 @@ def __printVosiCapability__ (self, **kwargs): print (' ') val = ' ' + self.httpurl + '/TAP' print (val) - + print (' ') print ('') print (' ') @@ -3137,10 +3132,10 @@ def __printVosiCapability__ (self, **kwargs): print (' ') print (' ') print (' ') - + val = ' ' + self.httpurl + '/TAP/tables' print (val) - + print (' GET') print (' application/xml') print (' ') @@ -3149,31 +3144,31 @@ def __printVosiCapability__ (self, **kwargs): print (' ') print (' ') print (' ') - + val = ' ' + self.httpurl + '/TAP/capabilities' print (val) - + print (' ') print (' ') print ('') print (' ') print (' ') print (' ') - + val = ' ' + self.httpurl + '/TAP/availability' print (val) - + print (' ') print (' ') print ('') - print ('') - + print ('') + sys.exit() - + # # } end printVosiCapability # - + # # } end tap class # diff --git a/TAP/tapquery.py b/TAP/tapquery.py index cf543ea..47bf0db 100644 --- a/TAP/tapquery.py +++ b/TAP/tapquery.py @@ -2,25 +2,20 @@ # This code is released with a BSD 3-clause license. License information is at # https://github.com/Caltech-IPAC/nexsciTAP/blob/master/LICENSE -import sys -import os -import logging - -import datetime - import argparse -import configobj - -from TAP.datadictionary import dataDictionary -from TAP.writeresult import writeResult -from TAP.tablenames import TableNames -from TAP.tablevalidator import TableValidator, TableValidationError -from TAP.configparam import configParam +import logging +import os +import sys from ADQL.adql import ADQL - from spatial_index import SpatialIndex +from TAP.configparam import configParam +from TAP.datadictionary import dataDictionary +from TAP.tablenames import TableNames +from TAP.tablevalidator import TableValidationError, TableValidator +from TAP.writeresult import writeResult + class tapQuery: @@ -80,8 +75,8 @@ def __init__(self, **kwargs): conn: We will need the above connectInfo for it's formatting info but we may have already made the DBMS connection elsewhere (for more involved - processing scenarios). In that case we can - optionally pass in the connection itself + processing scenarios). In that case we can + optionally pass in the connection itself instead of creating it here. query(char): the sql query to be executed, workdir(char): user work directory @@ -101,8 +96,8 @@ def __init__(self, **kwargs): workdir=userworkdir, filename=filename, maxrec=maxrec, - ddtbl=ddtbl, - ddtbl=ddfile, + ddtbl=ddtbl, + ddtbl=ddfile, format=format, racol=racol, deccol=deccol) @@ -120,7 +115,7 @@ def __init__(self, **kwargs): self.ddfile = kwargs['ddfile'] if self.debug: - logging.debug(f'Enter tapQuery') + logging.debug('Enter tapQuery') logging.debug(f'self.debug = {self.debug:d}') @@ -149,15 +144,15 @@ def __init__(self, **kwargs): # Otherwise collect the info for making the connection and make it. else: - + # Collect DBMS-specific keyword parameters # # There are two use modes for getting these parameters. Most of the time we let this code do everything; # getting the connection paramters and making the actual connection ('conn' parameter) to whichever DBMS # we are using. But sometimes we have a more complicated scenario (like using and populating temporary - # tables as part of the processing). The we will have run tapUtil() to do this same setup and used the + # tables as part of the processing). The we will have run tapUtil() to do this same setup and used the # database connection for these other steps before we get here. The all of the code here and the section - # below where we connect to the database will have already been executed there. + # below where we connect to the database will have already been executed there. # ORACLE @@ -166,7 +161,7 @@ def __init__(self, **kwargs): import cx_Oracle - self.dbserver = None + self.dbserver = None if('dbserver' in self.connectInfo): self.dbserver = self.connectInfo['dbserver'] @@ -176,7 +171,7 @@ def __init__(self, **kwargs): self.status = 'error' raise Exception(self.msg) - self.userid = None + self.userid = None if ('userid' in self.connectInfo): self.userid = self.connectInfo['userid'] @@ -208,7 +203,7 @@ def __init__(self, **kwargs): import sqlite3 - self.db = None + self.db = None if ('db' in self.connectInfo): self.db = self.connectInfo['db'] @@ -218,7 +213,7 @@ def __init__(self, **kwargs): self.status = 'error' raise Exception(self.msg) - self.tap_schema = None + self.tap_schema = None if('tap_schema' in self.connectInfo): self.tap_schema = self.connectInfo['tap_schema'] @@ -232,20 +227,20 @@ def __init__(self, **kwargs): logging.debug(f'db= {self.db:s}') logging.debug(f'tap_schema= {self.tap_schema:s}') - + # MYSQL elif self.dbms.lower() == 'mysql': import mysql.connector - - self.dbserver = None + + self.dbserver = None self.port = 3306 self.socket = None self.db = None self.userid = None self.password = None - + if ('dbserver' in self.connectInfo): self.dbserver = self.connectInfo['dbserver'] @@ -260,10 +255,10 @@ def __init__(self, **kwargs): if ((self.dbserver is None) and \ (self.socket is None)): - + self.msg = 'Failed to retrieve required input DB server ' \ 'parameter [dbserver] or [socket]' - + self.status = 'error' raise Exception(self.msg) @@ -306,7 +301,7 @@ def __init__(self, **kwargs): import psycopg2 - self.hostname = None + self.hostname = None if('hostname' in self.connectInfo): self.hostname = self.connectInfo['hostname'] @@ -316,7 +311,7 @@ def __init__(self, **kwargs): self.status = 'error' raise Exception(self.msg) - self.database = None + self.database = None if('database' in self.connectInfo): self.database = self.connectInfo['database'] @@ -326,7 +321,7 @@ def __init__(self, **kwargs): self.status = 'error' raise Exception(self.msg) - self.username = None + self.username = None if('username' in self.connectInfo): self.username = self.connectInfo['username'] @@ -336,7 +331,7 @@ def __init__(self, **kwargs): self.status = 'error' raise Exception(self.msg) - self.password = None + self.password = None if('password' in self.connectInfo): self.password = self.connectInfo['password'] @@ -379,14 +374,14 @@ def __init__(self, **kwargs): if self.debug: logging.debug('connected to Oracle, DB ' + self.dbserver) - except Exception as e: + except Exception: self.status = 'error' self.msg = 'Failed to connect to cx_Oracle' raise Exception(self.msg) - + # SQLITE3 elif self.dbms.lower() == 'sqlite3': @@ -411,7 +406,7 @@ def __init__(self, **kwargs): if self.debug: logging.debug('TAP_SCHEMA attached') - except Exception as e: + except Exception: self.status = 'error' self.msg = 'Failed to connect to SQLite3 databases' @@ -422,7 +417,7 @@ def __init__(self, **kwargs): # MYSQL elif self.dbms.lower() == 'mysql': - + try: if (self.dbserver is not None): @@ -433,7 +428,7 @@ def __init__(self, **kwargs): port=self.port, \ db=self.db ) - + elif (self.socket is not None): self.conn = mysql.connector.connect ( @@ -442,7 +437,7 @@ def __init__(self, **kwargs): unix_socket=self.socket, \ db=self.db ) - + else: self.status = 'error' self.msg = 'Failed to connect to mysql databases' @@ -451,7 +446,7 @@ def __init__(self, **kwargs): if self.debug: logging.debug('mysql connected') - except Exception as e: + except Exception: self.status = 'error' self.msg = 'Failed to connect to mysql databases' @@ -460,7 +455,7 @@ def __init__(self, **kwargs): if self.debug: logging.debug('here0') - + # POSTGRESQL @@ -485,10 +480,10 @@ def __init__(self, **kwargs): raise Exception(self.msg) - + # Get the query and query processing parameters (format, workspace, coordinate columns, etc.) - self.sql = None + self.sql = None if('query' in kwargs): self.sql = kwargs['query'] @@ -514,7 +509,7 @@ def __init__(self, **kwargs): if('filename' in kwargs): logging.debug(f'filename= {self.filename:s}') else: - logging.debug(f'filename= None') + logging.debug('filename= None') logging.debug(f'sql= {self.sql:s}') @@ -540,7 +535,7 @@ def __init__(self, **kwargs): try: self.maxrec = int(maxrecstr) - except Exception as e: + except Exception: self.msg = "Failed to convert input maxrec value [" + \ maxrecstr + "] to integer." @@ -553,7 +548,7 @@ def __init__(self, **kwargs): # Extract DB table name from query - self.dbtable = None + self.dbtable = None tn = TableNames() tables = tn.extract_tables(self.sql) @@ -626,12 +621,12 @@ def __init__(self, **kwargs): if self.debug: logging.debug('DD successfully retrieved.') - except Exception as e: + except Exception: if self.debug: logging.debug('dataDictionary retrieval failure.') - self.msg = f'dataDictionary retrieval failure.' + self.msg = 'dataDictionary retrieval failure.' raise Exception(self.msg) @@ -643,19 +638,19 @@ def __init__(self, **kwargs): # cursor = self.conn.cursor() - + if self.debug: logging.debug(f'sql = {self.sql:s}') logging.debug('call execut sql') self.msg = self.__executeSql__(cursor, self.sql, debug=1) - + if self.debug: logging.debug('returned executeSql') if len(self.msg) > 0: raise Exception(self.msg) - + # # Call writeResult @@ -703,10 +698,9 @@ def __executeSql__(self, cursor, sql, **kwargs): # { # - debug = 0 if('debug' in kwargs): - debug = kwargs['debug'] + kwargs['debug'] if self.debug: logging.debug('Enter executeSql') @@ -725,7 +719,7 @@ def __executeSql__(self, cursor, sql, **kwargs): try: cursor.execute(sql) - + except Exception as e: return_message = str(e).replace('"', "'") @@ -734,10 +728,10 @@ def __executeSql__(self, cursor, sql, **kwargs): try: self.conn.rollback() - - except Exception as e: + + except Exception: pass - + return return_message # diff --git a/TAP/taputil.py b/TAP/taputil.py index accc497..af4c19f 100644 --- a/TAP/taputil.py +++ b/TAP/taputil.py @@ -2,17 +2,15 @@ # This code is released with a BSD 3-clause license. License information is at # https://github.com/Caltech-IPAC/nexsciTAP/blob/master/LICENSE -import sys -import os import logging +import os +import sys -import pprint - -import configobj +from ADQL.adql import ADQL +from spatial_index import SpatialIndex from TAP.configparam import configParam -from ADQL.adql import ADQL -from spatial_index import SpatialIndex + class tapUtil: @@ -20,7 +18,7 @@ def __init__(self, **kwargs): self.conn = None - pid = os.getpid() + os.getpid() sys.tracebacklimit = 0 @@ -102,7 +100,7 @@ def __init__(self, **kwargs): socket = None if ('socket' in connectInfo): socket = connectInfo['socket'] - + port = None if ('port' in connectInfo): port = connectInfo['port'] @@ -174,7 +172,7 @@ def __init__(self, **kwargs): logging.debug('') logging.debug('connected to Oracle, DB ' + dbserver) - except Exception as e: + except Exception: msg = 'Failed to connect to cx_Oracle' raise Exception(msg) @@ -234,7 +232,7 @@ def __init__(self, **kwargs): logging.debug('') logging.debug('TAP_SCHEMA attached') - except Exception as e: + except Exception: msg = 'Failed to connect to SQLite3 databases' raise Exception(msg) @@ -242,9 +240,8 @@ def __init__(self, **kwargs): # MYSQL elif (dbms.lower() == 'mysql'): - - import msql.connector - + + try: if (dbserver is not None): @@ -255,7 +252,7 @@ def __init__(self, **kwargs): port=port, \ db=db ) - + elif (socket is not None): self.conn = mysql.connector.connect ( @@ -264,7 +261,7 @@ def __init__(self, **kwargs): unix_socket=socket, \ db=db ) - + else: msg = 'Failed to connect to mysql databases' raise Exception(msg) @@ -273,14 +270,14 @@ def __init__(self, **kwargs): logging.debug('') logging.debug('mysql connected') - except Exception as e: + except Exception: msg = 'Failed to connect to mysql databases' raise Exception(msg) if debug: logging.debug('') logging.debug('here0') - + else: msg = 'Invalid DBMS' raise Exception(msg) diff --git a/TAP/vositables.py b/TAP/vositables.py index 206bd69..58cba34 100644 --- a/TAP/vositables.py +++ b/TAP/vositables.py @@ -3,13 +3,10 @@ # https://github.com/Caltech-IPAC/nexsciTAP/blob/master/LICENSE -import sys -import os -import logging - -import datetime - import argparse +import logging +import os +import sys class vosiTables: @@ -27,7 +24,7 @@ class vosiTables: returnMsg = "" arraysize = 10000 - + # # DD columns # @@ -52,28 +49,28 @@ class vosiTables: coldesc = 0 nschema = 0 - + schema_namearr = [] schema_descarr = [] - - + + def __init__(self, **kwargs): - + # # { vosiTables.init method # """ - vosiTables class generates the VOSI compatible XML outputs for TAP - query with endpoint TAP/tables + vosiTables class generates the VOSI compatible XML outputs for TAP + query with endpoint TAP/tables Required keyword input parameters: - connectInfo (python dictionary): - a python dictionary containing info needed to make - a "connection" to a DBMS. - - ourpath(char): output table file path + connectInfo (python dictionary): + a python dictionary containing info needed to make + a "connection" to a DBMS. + + ourpath(char): output table file path Usage: @@ -86,13 +83,13 @@ def __init__(self, **kwargs): debug=debug) """ - self.dbms = '' + self.dbms = '' self.dbserver = '' self.userid = '' self.password = '' self.outpath = '' - self.debug = 0 - + self.debug = 0 + self.db = '' self.tap_schema = '' @@ -127,19 +124,19 @@ def __init__(self, **kwargs): # # Get keyword parameters # - + if ((self.dbms is None) or (len(self.dbms) == 0)): self.msg = 'Database name missing.' self.status = 'error' #self.__printVosiError__ (self.msg) - + raise Exception (self.msg) if self.debug: logging.debug('') logging.debug(f'dbms= {self.dbms:s}') - - + + if (self.dbms.lower() == 'oracle'): if ((self.dbserver is None) or (len(self.dbserver) == 0)): @@ -148,33 +145,33 @@ def __init__(self, **kwargs): #self.__printVosiError__ (self.msg) raise Exception (self.msg) - + if self.debug: logging.debug('') logging.debug(f'dbserver= {self.dbserver:s}') - + if ((self.userid is None) or (len(self.userid) == 0)): self.msg = 'Database userid missing.' self.status = 'error' #self.__printVosiError__ (self.msg) raise Exception (self.msg) - + if self.debug: logging.debug('') logging.debug(f'userid= {self.userid:s}') - + if ((self.password is None) or (len(self.password) == 0)): self.msg = 'Database password missing.' self.status = 'error' #self.__printVosiError__ (self.msg) raise Exception (self.msg) - + if self.debug: logging.debug('') logging.debug(f'password= {self.password:s}') - + elif (self.dbms.lower() == 'sqlite3'): if ((self.db is None) or (len(self.db) == 0)): @@ -183,30 +180,30 @@ def __init__(self, **kwargs): #self.__printVosiError__ (self.msg) raise Exception (self.msg) - + if self.debug: logging.debug('') logging.debug(f'db= {self.db:s}') - + if ((self.tap_schema is None) or (len(self.tap_schema) == 0)): - + self.msg = 'Database table name missing.' self.status = 'error' #self.__printVosiError__ (self.msg) raise Exception (self.msg) - + if self.debug: logging.debug('') logging.debug(f'tap_schema= {self.tap_schema:s}') - + if ((len(self.outpath) == 0) or (self.outpath is None)): self.msg = 'Required output path is missing.' #self.__printVosiError__ (self.msg) raise Exception (self.msg) - + if self.debug: logging.debug('') logging.debug(f'outpath= {self.outpath:s}') @@ -234,7 +231,7 @@ def __init__(self, **kwargs): logging.debug('') logging.debug('connected to Oracle, DB ' + self.dbserver) - except Exception as e: + except Exception: self.status = 'error' self.msg = 'Failed to connect to cx_Oracle' @@ -301,7 +298,7 @@ def __init__(self, **kwargs): logging.debug('') logging.debug('TAP_SCHEMA attached') - except Exception as e: + except Exception: self.status = 'error' self.msg = 'Failed to connect to SQLite3 databases' @@ -319,10 +316,10 @@ def __init__(self, **kwargs): # # } end Connect to DBMS # - + # # start loop for constructing output: - # + # # -- vosi header: tableset tag, # -- query for schemas: schema tag, # -- for each schema, query tables: table tag, @@ -333,25 +330,25 @@ def __init__(self, **kwargs): # -- end table tag, # -- end schema tag # -- end tableset tag - + # # open ouput file in workdir for vosi table output # #vositblpath = self.workdir + '/vositable.xml' vositblpath = self.outpath - + if self.debug: logging.debug('') logging.debug(f'vositblpath = {vositblpath:s}') - + fp = None try: fp = open (vositblpath, 'w') os.chmod(vositblpath, 0o664) except Exception as e: - + if self.debug: logging.debug('') logging.debug(f'open voditblpath exception: {str(e):s}') @@ -392,7 +389,7 @@ def __init__(self, **kwargs): if self.debug: logging.debug('') logging.debug(f'executeSql exception: {str(e):s}') - + #self.__printVosiError__ (str(e)) raise Exception (str(e)) @@ -409,7 +406,7 @@ def __init__(self, **kwargs): self.__getSchemaQueryArr__ (cursor, \ debug=self.debug) - + if self.debug: logging.debug('') logging.debug('returned getSchemaDesc') @@ -423,9 +420,9 @@ def __init__(self, **kwargs): #self.__printVosiError__ (str(e)) raise Exception (str(e)) - + self.nschema = len (self.schema_namearr) - + if self.debug: logging.debug('') logging.debug(f'nschema= {self.nschema:d}') @@ -436,7 +433,7 @@ def __init__(self, **kwargs): logging.debug(f'{i:d} schema_name={self.schema_namearr[i]:s}') logging.debug(f'schema_description={self.schema_descarr[i]:s}') - for ischema in range (0, self.nschema): + for ischema in range (0, self.nschema): # # { ischema loop # @@ -448,9 +445,9 @@ def __init__(self, **kwargs): self.schema_namearr[ischema] + "\n") fp.write(" " + \ self.schema_descarr[ischema] + "\n") - fp.flush() - - + fp.flush() + + # # 2. execute table sql # @@ -476,7 +473,7 @@ def __init__(self, **kwargs): if self.debug: logging.debug('') logging.debug(f'executeSql exception: {str(e):s}') - + #self.__printVosiError__ (str(e)) raise Exception (str(e)) @@ -487,15 +484,15 @@ def __init__(self, **kwargs): logging.debug('') logging.debug('call getTableQueryArr') - # - # 3. getSqlresult of table sql + # + # 3. getSqlresult of table sql # re-initialize arrays # table_namearr = [] table_descarr = [] table_typearr = [] - + ntable = 0 try: ntable = self.__getTableQueryArr__(cursor, table_namearr, \ @@ -506,7 +503,7 @@ def __init__(self, **kwargs): if self.debug: logging.debug('') logging.debug(f'getTableQueryArr exception: {str(e):s}') - + #self.__printVosiError__ (str(e)) raise Exception (str(e)) @@ -515,9 +512,9 @@ def __init__(self, **kwargs): logging.debug('') logging.debug('returned getTableQueryArr') logging.debug(f'ntable= {ntable:d}') - - - for itable in range (0, ntable): + + + for itable in range (0, ntable): logging.debug ('') logging.debug (f'itable= {itable:d}') logging.debug ( @@ -526,15 +523,15 @@ def __init__(self, **kwargs): f'table_desc= {table_descarr[itable]:s}') logging.debug ( f'table_type= {table_typearr[itable]:s}') - + # # for each table: # - for itable in range (0, ntable): + for itable in range (0, ntable): # # { itable loop - # + # # 1. write header block for each table, # fp.write(" \n") - fp.flush() - + fp.flush() + # # } end itable loop # - + # # 2. write end block for each schema # fp.write(" \n") - fp.flush() + fp.flush() # # } end ischema loop and write end block for xml file # fp.write("\n") - fp.flush() - + fp.flush() + # # } end vosiTables.init # @@ -702,7 +699,7 @@ def __getTableQueryArr__ (self, cursor, table_namearr, table_descarr, \ # table_name, description, and table_type; # debug = 0 - + if('debug' in kwargs): debug = kwargs['debug'] @@ -710,7 +707,6 @@ def __getTableQueryArr__ (self, cursor, table_namearr, table_descarr, \ logging.debug('') logging.debug('Enter getTableQueryArr') - size = 0 table_colname = [] table_dbtype = [] @@ -752,7 +748,6 @@ def __getTableQueryArr__ (self, cursor, table_namearr, table_descarr, \ dbtype = '' - size = None dbdatatypestr = str(col[1]) @@ -813,7 +808,7 @@ def __getTableQueryArr__ (self, cursor, table_namearr, table_descarr, \ logging.debug(f'nrec = {nrec:d}') logging.debug('') - + for ll in range(0, nrec): # @@ -821,7 +816,7 @@ def __getTableQueryArr__ (self, cursor, table_namearr, table_descarr, \ # row = rows[ll] - + if debug: logging.debug ('') logging.debug (f'len(row)= {len(row):d}') @@ -845,13 +840,13 @@ def __getTableQueryArr__ (self, cursor, table_namearr, table_descarr, \ if debug: logging.debug (f'i= {i:d}, data= {data:s}') logging.debug (f'table_colname= {table_colname[i]:s}') - + if (table_colname[i].lower() == 'table_name'): table_namearr.append (data) - + elif (table_colname[i].lower() == 'description'): table_descarr.append (data) - + elif (table_colname[i].lower() == 'table_type'): table_typearr.append (data) @@ -863,7 +858,7 @@ def __getTableQueryArr__ (self, cursor, table_namearr, table_descarr, \ # # } end of ll loop - # + # if debug: logging.debug('') logging.debug(table_namearr) @@ -871,7 +866,7 @@ def __getTableQueryArr__ (self, cursor, table_namearr, table_descarr, \ logging.debug(table_typearr) logging.debug('----------------------------------------') - + ntot = ntot + nrec if(len(rows) < cursor.arraysize): @@ -891,11 +886,11 @@ def __getTableQueryArr__ (self, cursor, table_namearr, table_descarr, \ # - + def __getSchemaQueryArr__ (self, cursor, **kwargs): # # { vosiTables.getSchemaQueryArr - # + # # this query contains only two columns: # schema_name and schema_description # @@ -906,12 +901,12 @@ def __getSchemaQueryArr__ (self, cursor, **kwargs): if self.debug: logging.debug('') - logging.debug(f'Enter __getSchemaQueryArr__\n') + logging.debug('Enter __getSchemaQueryArr__\n') size = 0 nfetch = self.arraysize cursor.arraysize = nfetch - + if debug: logging.debug('') logging.debug(f'nfetch= {nfetch:d}') @@ -938,7 +933,7 @@ def __getSchemaQueryArr__ (self, cursor, **kwargs): # colname = str(col[0]).lower() - + if debug: logging.debug('') logging.debug(f'colname(lower) = {colname:s}') @@ -951,8 +946,6 @@ def __getSchemaQueryArr__ (self, cursor, **kwargs): dbdatatype = '' size = None - precision = None - scale = None dbdatatypestr = str(col[1]) @@ -1079,7 +1072,7 @@ def __getSchemaQueryArr__ (self, cursor, **kwargs): # row = rows[ll] - + if debug: logging.debug(row) logging.debug(f'row[0]= {row[0]:s}') @@ -1088,7 +1081,7 @@ def __getSchemaQueryArr__ (self, cursor, **kwargs): self.schema_namearr.append(row[0]) self.schema_descarr.append(row[1]) - + if debug: logging.debug(row) logging.debug(self.schema_namearr) @@ -1167,7 +1160,6 @@ def __writeOneTableResult__ (self, cursor, fp, **kwargs): # dbdatatype = '' - size = None dbdatatypestr = str(col[1]) @@ -1191,7 +1183,7 @@ def __writeOneTableResult__ (self, cursor, fp, **kwargs): # # } end extract dbdatatype from col_array: col[1] # - + i = i + 1 # @@ -1234,13 +1226,13 @@ def __writeOneTableResult__ (self, cursor, fp, **kwargs): # row = rows[ll] - data = '' + data = '' fp.write(" \n") - + col_name = '' for i in range(0, len(row)): - + if (row[i] is None): data = 'None' elif (dbtype[i] == 'STRING'): @@ -1260,65 +1252,65 @@ def __writeOneTableResult__ (self, cursor, fp, **kwargs): if self.debug: logging.debug('') logging.debug(f'll= {ll:d} col_name= {col_name:s} data= {data:s}') - + if (colnamearr[i] == 'column_name'): fp.write(" " + data + "\n") - + elif (colnamearr[i] == 'description'): fp.write(" " + \ "\n") - + elif (colnamearr[i] == 'datatype'): fp.write (" " + \ "" + \ data + "\n") - + elif (colnamearr[i] == 'arraysize'): if (data != 'None'): fp.write (" " + data + \ "\n") - + elif (colnamearr[i] == 'xtype'): if (data != 'None'): fp.write (" " + data + \ "\n") - + elif (colnamearr[i] == 'utype'): if (data != 'None'): fp.write(" " + data + \ "\n") - + elif (colnamearr[i] == 'unit'): if (data != 'None'): fp.write(" " + data + \ "\n") - + elif (colnamearr[i] == 'ucd'): if (data != 'None'): fp.write (" " + data + \ "\n") - + elif (colnamearr[i] == 'indexed'): if (data != 'None'): fp.write (" " + data + \ "\n") - + elif (colnamearr[i] == 'principal'): if (data != 'None' and data != 0 and data != '0'): fp.write(" " + \ "" + data + "\n") - + elif (colnamearr[i] == 'std'): if (data != 'None'): fp.write (" " + data + \ "\n") - + elif (colnamearr[i] == 'column_index'): if (data != 'None' and data != 0 and data != '0'): fp.write(" " + "" + data + \ "\n") - + irow = irow + 1 #if col_datatype == 'timestamp': @@ -1327,7 +1319,7 @@ def __writeOneTableResult__ (self, cursor, fp, **kwargs): # col_datatype = 'char' fp.write(" \n") - + # # } end of ll loop @@ -1336,7 +1328,7 @@ def __writeOneTableResult__ (self, cursor, fp, **kwargs): ntot = ntot + nrec if(len(rows) < cursor.arraysize): - break + break if self.debug: logging.debug('') @@ -1354,12 +1346,12 @@ def __writeOneTableResult__ (self, cursor, fp, **kwargs): def __writeForeignKey__ (self, cursor, fp, **kwargs): - + # # {vosiTables.writeForeignKey # debug = 0 - + if('debug' in kwargs): debug = kwargs['debug'] @@ -1404,7 +1396,6 @@ def __writeForeignKey__ (self, cursor, fp, **kwargs): # dbdatatype = '' - size = None dbdatatypestr = str(col[1]) @@ -1427,7 +1418,7 @@ def __writeForeignKey__ (self, cursor, fp, **kwargs): # # } end extract dbdatatype from col_array: col[1] # - + i = i + 1 # @@ -1466,16 +1457,15 @@ def __writeForeignKey__ (self, cursor, fp, **kwargs): # row = rows[ll] - - keyid = '' + targettbl = '' fromcol = '' targetcol= '' desc = '' - data = '' + data = '' for i in range(0, len(row)): - + if (row[i] is None): data = 'None' elif (dbtype[i] == 'STRING'): @@ -1488,7 +1478,7 @@ def __writeForeignKey__ (self, cursor, fp, **kwargs): logging.debug(f'i= {i:d}, data= {data:s}') if (i == 0): - keyid = data + pass elif (i == 1): targettbl = data elif (i == 2): @@ -1499,14 +1489,14 @@ def __writeForeignKey__ (self, cursor, fp, **kwargs): desc = data irow = irow + 1 - + # # for row, write column block # fp.write(" \n") fp.write(" " + targettbl + \ "\n") - + fp.write(" \n") fp.write(" " + fromcol + \ "\n") @@ -1525,7 +1515,7 @@ def __writeForeignKey__ (self, cursor, fp, **kwargs): ntot = ntot + nrec if(len(rows) < cursor.arraysize): - break + break if debug: logging.debug('') @@ -1585,7 +1575,7 @@ def __printVosiError__(self, errmsg): httphdr = "HTTP/1.1 500 ERROR\r" #print("HTTP/1.1 200 OK\r") - + print(httphdr) print("Content-type: text/xml\r") @@ -1628,19 +1618,19 @@ def main(): parser.add_argument('--dbserver', required=False, help='Oracle database server (dbserver) to use.') - + parser.add_argument('--userid', required=False, help='Oracle dbserver userid (userid) to use.') - + parser.add_argument('--password', required=False, help='Oracle dbserver (password) to use.') - + parser.add_argument('--db', required=False, help='Sqlite database name to use.') - + parser.add_argument('--tap_schema', required=False, help='Sqlite database table name to use.') - + parser.add_argument('--outpath', required=True, help='outpath for the result.') @@ -1662,47 +1652,47 @@ def main(): if (args.dbms is not None): dbms = args.dbms print (f'dbms= {dbms:s}') - + if (args.dbserver is not None): dbserver = args.dbserver print (f'dbserver= {dbserver:s}') - + if (args.userid is not None): userid = args.userid print (f'userid= {userid:s}') - - + + if (args.password is not None): password = args.password print (f'password= {password:s}') - - + + if (args.outpath is not None): outpath = args.outpath print (f'outpath= {outpath:s}') - - + + if (args.debug is not None): debug = args.debug print (f'debug= {debug:s}') - + if (args.db is not None): db = args.db print (f'db= {db:s}') - + if (args.tap_schema is not None): tap_schema = args.tap_schema print (f'tap_schema= {tap_schema:s}') - - + + if debug: debugfname = './vosi.debug' logging.basicConfig(filename=debugfname, \ level=logging.DEBUG) - print (f'debug turned on') - + print ('debug turned on') + logging.debug('') logging.debug('debug turned on') logging.debug(f'dbms= {dbms:s}') @@ -1713,13 +1703,12 @@ def main(): logging.debug(f'debug= {debug:s}') - vositbl = None try: if debug: logging.debug('') logging.debug('call vosiTables') - - vositbl = vosiTables (dbms=dbms, \ + + vosiTables (dbms=dbms, \ dbserver=dbserver, \ userid=userid, \ password=password, \ diff --git a/TAP/writeresult.py b/TAP/writeresult.py index 3489556..a7a24e1 100644 --- a/TAP/writeresult.py +++ b/TAP/writeresult.py @@ -13,10 +13,9 @@ # import logging -import datetime - from TAP import writerecs + class writeResult: time00 = None @@ -70,7 +69,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): """ cursor: oracle query returned cursor containing data stream, workdir: work directory for output files, - filename: output file name. + filename: output file name. dd: data dictionary for the output columns, coldes(0/1): indictes whether to include column descriptions in the output file header @@ -103,8 +102,8 @@ def __init__(self, cursor, workdir, dd, **kwargs): logging.debug('') logging.debug('Enter writeResult:') logging.debug(f'self.debug= {self.debug:d}') - - + + self.cursor = cursor self.workdir = workdir self.dd = dd @@ -116,7 +115,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): if self.debug: logging.debug('') logging.debug(f'ncols_dd= {self.ncols_dd:d}') - + if('racol' in kwargs): self.racol = kwargs['racol'] @@ -125,7 +124,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): if('deccol' in kwargs): self.deccol = kwargs['deccol'] - + if (self.dd != None): self.ind_deccol = self.__getDDIndex__(self.dd, self.deccol) @@ -134,12 +133,12 @@ def __init__(self, cursor, workdir, dd, **kwargs): if('dbms' in kwargs): self.dbms = kwargs['dbms'] - + if self.debug: logging.debug('') logging.debug(f'dbms= {self.dbms:s}') - - + + if('format' in kwargs): self.format = kwargs['format'] @@ -149,7 +148,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): if self.debug: logging.debug('') logging.debug('here0-2') - + self.coldesc = 0 if('coldesc' in kwargs): self.coldesc = kwargs['coldesc'] @@ -172,7 +171,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): logging.debug(f' arraysize = {self.arraysize:d}') self.status = '' - + # # open querypath for output # @@ -191,9 +190,9 @@ def __init__(self, cursor, workdir, dd, **kwargs): resulttbl = 'result.tsv' elif(self.format == 'json'): resulttbl = 'result.json' - + self.outpath = self.workdir + '/' + resulttbl - + if self.debug: pathstr = str(self.outpath) logging.debug('') @@ -220,14 +219,14 @@ def __init__(self, cursor, workdir, dd, **kwargs): logging.debug('------------------------------------------------') for desc in self.cursor.description: - name = desc[0] + desc[0] coltype = desc[1] - dispsize = desc[2] - internalsz = desc[3] + desc[2] + desc[3] precision = desc[4] - scale = desc[5] - nullok = desc[6] - + scale = desc[5] + desc[6] + # @@ -255,9 +254,9 @@ def __init__(self, cursor, workdir, dd, **kwargs): # varchr, sting -- char, # int, integer, long, etc.. -- int (32bit), # - # oracle: dbtype == NUMBER, check first batch of data to see if - # it is int or float: if any number in this batch is float, - # consider the column dtype as double + # oracle: dbtype == NUMBER, check first batch of data to see if + # it is int or float: if any number in this batch is float, + # consider the column dtype as double # # pgsql: dbtype == decimal, assume it is double # @@ -268,19 +267,19 @@ def __init__(self, cursor, workdir, dd, **kwargs): dbdatatype = None - precision = None + precision = None scale = None - size = None + size = None i = 0 for col in self.cursor.description: # # { cursor description for loop: - # + # # cursor description is a list with ncols tuples: # each tuple contains each output colname's name, datatype, - # dbdatatype, size, precision, scale but most DBMSs' - # implementation are incomplete, only name and dbdatatype + # dbdatatype, size, precision, scale but most DBMSs' + # implementation are incomplete, only name and dbdatatype # are reliably implemented. # @@ -317,14 +316,14 @@ def __init__(self, cursor, workdir, dd, **kwargs): if self.debug: logging.debug('') - logging.debug(f'analyze description array:') + logging.debug('analyze description array:') logging.debug(f'dbms= {self.dbms:s}') if (self.dbms.lower() == 'oracle'): # # { oracle datatype from descriptor - # + # if self.debug: logging.debug('') logging.debug('oracle:') @@ -341,7 +340,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): ind = dbdatatypestr.find("VARCHAR") if(ind != -1): dbdatatype = 'VARCHAR' - + ind = dbdatatypestr.find("STRING") if(ind != -1): dbdatatype = 'STRING' @@ -369,7 +368,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): ind = dbdatatypestr.find("FLOAT") if(ind != -1): dbdatatype = 'FLOAT' - + if self.debug: logging.debug('') logging.debug(f'dbdatatype = {dbdatatype:s}') @@ -381,7 +380,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): elif (self.dbms.lower() == 'pgsql'): # # { pgsql datatype from descriptor - # + # if self.debug: logging.debug('') logging.debug('pgsql:') @@ -439,18 +438,18 @@ def __init__(self, cursor, workdir, dd, **kwargs): elif (self.dbms.lower() == 'mysql'): # # { mysql datatype from descriptor - # + # if self.debug: logging.debug('') - logging.debug(f'mysql:') + logging.debug('mysql:') logging.debug('col[1]:') logging.debug(col[1]) coltype = int(col[1]) - + if (coltype == 253): dbdatatype = 'VARCHAR' - + if (coltype == 3): dbdatatype = 'LONG' @@ -479,10 +478,10 @@ def __init__(self, cursor, workdir, dd, **kwargs): elif (self.dbms.lower() == 'sqlite3'): # # { mysql datatype from descriptor - # + # if self.debug: logging.debug('') - logging.debug(f'sqlite3:') + logging.debug('sqlite3:') dbdatatype = '' # @@ -495,14 +494,14 @@ def __init__(self, cursor, workdir, dd, **kwargs): logging.debug(f'dbdatatype = {dbdatatype:s}') logging.debug(f'size = {size:d}') - except Exception as e: - logging.debug(f'dbdatatype not set.') + except Exception: + logging.debug('dbdatatype not set.') pass if(col[2] is not None): size = int(col[2]) - + if self.debug: logging.debug(f'size= {size:d}') @@ -562,7 +561,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): try: if len(units) <= 0: units = '' - except Exception as e: + except Exception: units = '' isddcolarr.append(1) @@ -582,10 +581,10 @@ def __init__(self, cursor, workdir, dd, **kwargs): if self.debug: logging.debug('') - logging.debug(f'char type col in dd') + logging.debug('char type col in dd') logging.debug('size=') logging.debug(size) - + if (len(colname) > width): width = len(colname) @@ -671,7 +670,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): width = 80 if ((size is not None) and (size > 0)): width = size - + if(len(colname) > width): width = len(colname) @@ -840,7 +839,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): # { dbdatatype == numeric: this is pgsql oid=1700 special dtype, # if no dd, then we assume it is a double number, although # this dbdatatype is often used to represent a large integer. - # + # coltype = 'double' dbtype = dbdatatype @@ -856,7 +855,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): fmt = str(width) + '.14e' # - # } end dbdatatype == numeric + # } end dbdatatype == numeric # else: @@ -883,12 +882,12 @@ def __init__(self, cursor, workdir, dd, **kwargs): # # - # Special cases: if colname == 'RA' or 'DEC' use + # Special cases: if colname == 'RA' or 'DEC' use # racol, decol in dd # if (dd is not None): - + if(colname.lower() == 'ra'): if((dbdatatype == 'NUMBER') and \ @@ -948,7 +947,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): # to avoid excess data transfer) # - len_arr = len(namearr) + len(namearr) ddlist.append(namearr) ddlist.append(typearr) @@ -984,14 +983,14 @@ def __init__(self, cursor, workdir, dd, **kwargs): try: if self.debug: logging.debug('') - logging.debug(f'Calling writerecs()[1]:') - logging.debug(f'format: ' + str(self.format)) - logging.debug(f'ddlist: ' + str(ddlist)) - logging.debug(f'rowslist: ' + str(rowslist)) - logging.debug(f'ishdr: ' + str(self.ishdr)) - logging.debug(f'coldesc: ' + str(self.coldesc)) - logging.debug(f'overflow: ' + str(self.overflow)) - logging.debug(f'istail: ' + str(self.istail)) + logging.debug('Calling writerecs()[1]:') + logging.debug('format: ' + str(self.format)) + logging.debug('ddlist: ' + str(ddlist)) + logging.debug('rowslist: ' + str(rowslist)) + logging.debug('ishdr: ' + str(self.ishdr)) + logging.debug('coldesc: ' + str(self.coldesc)) + logging.debug('overflow: ' + str(self.overflow)) + logging.debug('istail: ' + str(self.istail)) istatus = writerecs.writerecs(self.outpath, self.format, ddlist, rowslist, self.ishdr, @@ -1074,7 +1073,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): #if (self.dbms.lower() == 'mysql'): # rows = cursor.fetchmany (self.cursor.arraysize) - #else: + #else: # rows = cursor.fetchmany(self.cursor.arraysize) rowslist = [] @@ -1109,14 +1108,14 @@ def __init__(self, cursor, workdir, dd, **kwargs): if self.debug: logging.debug('') - logging.debug(f'Calling writerecs()[2]:') - logging.debug(f'format: ' + str(self.format)) - logging.debug(f'ddlist: ' + str(ddlist)) - logging.debug(f'rowslist: ' + str(rowslist)) - logging.debug(f'ishdr: ' + str(self.ishdr)) - logging.debug(f'coldesc: ' + str(self.coldesc)) - logging.debug(f'overflow: ' + str(self.overflow)) - logging.debug(f'istail: ' + str(self.istail)) + logging.debug('Calling writerecs()[2]:') + logging.debug('format: ' + str(self.format)) + logging.debug('ddlist: ' + str(ddlist)) + logging.debug('rowslist: ' + str(rowslist)) + logging.debug('ishdr: ' + str(self.ishdr)) + logging.debug('coldesc: ' + str(self.coldesc)) + logging.debug('overflow: ' + str(self.overflow)) + logging.debug('istail: ' + str(self.istail)) istatus = writerecs.writerecs(self.outpath, self.format, ddlist, rowslist, self.ishdr, @@ -1139,7 +1138,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): raise Exception(str(e)) if self.debug: - logging.debug (f'return') + logging.debug ('return') logging.debug('') return @@ -1152,12 +1151,12 @@ def __init__(self, cursor, workdir, dd, **kwargs): if self.debug: logging.debug ( \ f'nrec = {nrec:d} and ibatch = {ibatch:d}') - logging.debug (f'break to write last batch of data') + logging.debug ('break to write last batch of data') break # - # } end batch > 0 + # } end batch > 0 # # # } end nrec = 0 @@ -1167,11 +1166,11 @@ def __init__(self, cursor, workdir, dd, **kwargs): # the column "description" block returned by the DBMS # does not give any datatypes. For those cases where # we don't have a priori schema information, we have to - # wait for the first data return to determine types. + # wait for the first data return to determine types. # This is complicated by the possibility that for some # records some columns are sometimes NULL (value 'None'). - # So while we don't want to scan all the data, we have + # So while we don't want to scan all the data, we have # to scan until we find some value for every column. if ((ibatch == 0) and (self.dbms.lower() == 'sqlite')): @@ -1246,7 +1245,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): # # {if pgsql dbtype = 'numeric', dtype will be decimal, # make it integer or float depending on the python type - # of typearr value + # of typearr value # dtype = type (row[i]).__name__ @@ -1292,12 +1291,12 @@ def __init__(self, cursor, workdir, dd, **kwargs): # - # For DBMSs like Oracle that store numbers as generic + # For DBMSs like Oracle that store numbers as generic # NUMBER type, we need to try to distinguish between - # int and double by checking the output row[i]'s + # int and double by checking the output row[i]'s # python datatype -- which might not be completely - # accurate so we check the whole column to make - # summary determination + # accurate so we check the whole column to make + # summary determination # if ((self.dbms.lower() == 'oracle') and \ (ibatch == 0) and \ @@ -1349,7 +1348,7 @@ def __init__(self, cursor, workdir, dd, **kwargs): if self.debug: logging.debug('----------------------------------------') - + # # The following block determines the NUMBER type ORACLE arrays's # datatype -- whether the column should be double or int based @@ -1418,21 +1417,21 @@ def __init__(self, cursor, workdir, dd, **kwargs): if self.debug: logging.debug('') - logging.debug(f'Calling writerecs()[3]:') - logging.debug(f'format: ' + str(self.format)) - logging.debug(f'ddlist: ' + str(ddlist)) - logging.debug(f'rowslist: ' + str(rowslist)) - logging.debug(f'ishdr: ' + str(self.ishdr)) - logging.debug(f'coldesc: ' + str(self.coldesc)) - logging.debug(f'overflow: ' + str(self.overflow)) - logging.debug(f'istail: ' + str(self.istail)) - logging.debug(f'cursor.arraysize' + str(self.cursor.arraysize)) + logging.debug('Calling writerecs()[3]:') + logging.debug('format: ' + str(self.format)) + logging.debug('ddlist: ' + str(ddlist)) + logging.debug('rowslist: ' + str(rowslist)) + logging.debug('ishdr: ' + str(self.ishdr)) + logging.debug('coldesc: ' + str(self.coldesc)) + logging.debug('overflow: ' + str(self.overflow)) + logging.debug('istail: ' + str(self.istail)) + logging.debug('cursor.arraysize' + str(self.cursor.arraysize)) istatus = writerecs.writerecs(self.outpath, self.format, ddlist, rowslist, self.ishdr, self.coldesc, self.overflow, self.istail) - + if self.debug: logging.debug('returned writerecs.writerecs') diff --git a/ctest/ctap.py b/ctest/ctap.py index baed052..76e2f04 100755 --- a/ctest/ctap.py +++ b/ctest/ctap.py @@ -3,10 +3,9 @@ # This example is as much documentation as processing. In a real # application we would only need parts of it. -from TAP.tapstore import tapStore -from TAP.taputil import tapUtil -from TAP.tapquery import tapQuery - +from TAP.tapquery import tapQuery +from TAP.tapstore import tapStore +from TAP.taputil import tapUtil # We may run into problems while connecting configuratio information, # translating the ADQL query into SQL, or submitting the user query @@ -42,7 +41,7 @@ def tapc(catalog, select_stmt, result_tbl): cursor = conn.cursor() cursor.execute('select count(*) from ' + catalog) rowcount = cursor.fetchone()[0] - + # But the main purpose of TAP is to run ADQL queries, including spatial constraints # and output formatting (output file type, column width, and number formatting). @@ -58,7 +57,7 @@ def tapc(catalog, select_stmt, result_tbl): # The returned 'query' object has several internal parameters but the ones most - # useful for further processing are the return status, the output file name + # useful for further processing are the return status, the output file name # (though this is just the name we put in above), and the return record count. if status == 'OK': @@ -66,5 +65,5 @@ def tapc(catalog, select_stmt, result_tbl): else: return status, msg, 0, 0 - except Exception as e: + except Exception: return 'ERROR', msg, 0, 0 diff --git a/ctest/dbtap.py b/ctest/dbtap.py index 84f5777..2fcfa19 100644 --- a/ctest/dbtap.py +++ b/ctest/dbtap.py @@ -1,6 +1,6 @@ -from TAP.tapstore import tapStore -from TAP.taputil import tapUtil -from TAP.tapquery import tapQuery +from TAP.tapquery import tapQuery +from TAP.tapstore import tapStore +from TAP.taputil import tapUtil dbutils = None dbconfig = None @@ -20,9 +20,9 @@ def dbinit(): dbconn = dbstore.getConn() dbadql = dbstore.getADQL() - except Exception as e: + except Exception: return 0 - + return 1 @@ -38,13 +38,13 @@ def dbquery(adql_string, filename): try: format = 'ipac' - cursor = dbconn.cursor() - + dbconn.cursor() + sql_string = dbadql.sql(adql_string) dbQuery = tapQuery(connectInfo=dbconfig.connectInfo, - conn=dbconn, - query=sql_string, + conn=dbconn, + query=sql_string, filename=filename, format=format) @@ -59,6 +59,6 @@ def dbquery(adql_string, filename): else: return status, msg, 0 - except Exception as e: + except Exception: return status, msg, 0 diff --git a/load_schema/load_schema.py b/load_schema/load_schema.py index 2f1a879..b16d782 100644 --- a/load_schema/load_schema.py +++ b/load_schema/load_schema.py @@ -1,7 +1,7 @@ #!/bin/env python -import sqlite3 import pprint +import sqlite3 pp = pprint.PrettyPrinter() @@ -29,11 +29,11 @@ # worked out once; from here on if we were to add tables we would just need # to add one "tables" record and its "columns" records. -# Finally, there are two more TAP_SCHEMA tables describing how foreign keys +# Finally, there are two more TAP_SCHEMA tables describing how foreign keys # in our tables relate the tables to each other (i.e., allow JOINS). Our one # real data table has no such keys but all these TAP_SCHEMA tables themselves # do have such relationships (and so must be included). However, unless we add -# new data tables which can join with each other, we never need to touch these +# new data tables which can join with each other, we never need to touch these # two tables again. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c952242 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,32 @@ +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = "-ra" + +[tool.ruff] +line-length = 120 +target-version = "py39" +exclude = [ + "build", + "dist", + "docs", + "Sphinx", # Sphinx config conventionally puts imports below comments + "Jupyter", + "tests/fixtures", +] + +[tool.ruff.lint] +select = ["E", "F", "W", "I"] +ignore = [ + # Existing code style choices we're not touching in this pass: + "E501", # line too long — many long log strings and SQL literals + "E701", # multiple statements on one line — common in if-on-one-line style + "E711", # comparison to None — code uses `== None` extensively + "E712", # comparison to True/False — code uses `== True` extensively + "E722", # bare except — code uses bare except in many places + "E741", # ambiguous variable name — `l` used in several loops + # Pre-existing real bugs to address in a follow-up; intentionally not + # fixed in the CI-setup PR to keep the diff focused. + # See: TAP/taputil.py:221,223,227,246,251,261 (SQLite + MySQL branches) + # TAP/tap.py:2402 (`result` typo in a debug log path) + "F821", # undefined-name +] diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..343732f --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,18 @@ +pytest>=7 +ruff>=0.1 +requests + +# Runtime deps. These are not declared in setup.py's install_requires (a +# separate cleanup) but the package imports them. The CI install uses +# --no-deps for nexsciTAP itself (to avoid the spatial_index PyPI hole), +# so we list everything actually-needed here. +configobj +sqlparse +astropy +beautifulsoup4 +xmltodict + +# pyneid is not currently used — tests/test_pyneid_compat.py is skipped +# at module level until async-flow fixture support lands. Listed as a +# reminder; comment back in when re-enabling. +# pyneid diff --git a/setup.py b/setup.py index afee75a..ac52e50 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,16 @@ -import os -from setuptools import setup from distutils.extension import Extension -setup(name='nexsciTAP', +from setuptools import setup + +setup(name='nexsciTAP', version='3.0.0', - author='John Good', - author_email='jcg@caltech.edu', - license='LICENSE', - keywords='astronomy database ADQL web-service', + author='John Good', + author_email='jcg@caltech.edu', + license='LICENSE', + keywords='astronomy database ADQL web-service', url = 'https://github.com/Caltech-IPAC/nexsciTAP', - description='NExScI VO Table Access Protocol (TAP) web service', + description='NExScI VO Table Access Protocol (TAP) web service', long_description=open('README.md').read(), ext_modules=[Extension('TAP/writerecs', ['TAP/writerecsmodule.c'])], install_requires=['ADQL', 'spatial_index', 'configobj'], diff --git a/tests/_helpers.py b/tests/_helpers.py new file mode 100644 index 0000000..11873ce --- /dev/null +++ b/tests/_helpers.py @@ -0,0 +1,22 @@ +"""Shared test utility functions. Not auto-discovered by pytest (no `test_` prefix).""" +from __future__ import annotations + + +def tap_sync_url(server: str) -> str: + """The TAP sync endpoint URL relative to *server*.""" + return f"{server}/cgi-bin/TAP/nph-tap.py/sync" + + +def tap_async_url(server: str) -> str: + """The TAP async endpoint URL relative to *server*.""" + return f"{server}/cgi-bin/TAP/nph-tap.py/async" + + +def row_count_from_ipac(body: str) -> int: + """Count data rows in an IPAC ASCII table body. + + IPAC format has 4 header lines (names, types, units, nulls) followed by + data rows. Empty trailing lines are ignored. + """ + lines = [L for L in body.splitlines() if L.strip()] + return max(0, len(lines) - 4) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..6aa0cf6 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,208 @@ +"""Shared pytest fixtures. + +Spins up a per-session HTTP server backed by SQLite fixtures and the +production CGI shim, so tests can exercise the full HTTP→CGI→TAP→DB pipeline. + +Layout under the session tmpdir: + + / + tap_schema.db — populated TAP_SCHEMA + test_data.db — data_l0, data_l1, test_access, test_users + TAP.conf — substituted from fixtures/TAP.conf.template + workdir/ — TAP_WORKDIR (request artifacts) + cgi-bin/ + TAP/ + nph-tap.py — copy of fixtures/nph-tap.py +""" +from __future__ import annotations + +import http.server +import os +import shutil +import socket +import sys +import threading +import time +from pathlib import Path + +import pytest + +TESTS_DIR = Path(__file__).parent +FIXTURES = TESTS_DIR / "fixtures" +STUBS = FIXTURES / "stubs" +sys.path.insert(0, str(FIXTURES)) +sys.path.insert(0, str(TESTS_DIR)) +sys.path.insert(0, str(STUBS)) + +import build_test_db # noqa: E402 + + +def _free_port() -> int: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("127.0.0.1", 0)) + return s.getsockname()[1] + + +@pytest.fixture(scope="session") +def fixture_root(tmp_path_factory) -> Path: + """Build all the per-session artifacts and return the tmpdir root.""" + root = tmp_path_factory.mktemp("nexscitap-ci") + + build_test_db.build_all(str(root)) + + (root / "workdir").mkdir() + cgi_dir = root / "cgi-bin" / "TAP" + cgi_dir.mkdir(parents=True) + shutil.copy(FIXTURES / "nph-tap.py", cgi_dir / "nph-tap.py") + (cgi_dir / "nph-tap.py").chmod(0o755) + + return root + + +@pytest.fixture(scope="session") +def tap_conf(fixture_root: Path) -> Path: + """Materialize TAP.conf with absolute paths substituted in.""" + port = _free_port() + template = (FIXTURES / "TAP.conf.template").read_text() + conf = template.format( + TEST_WORKDIR=str(fixture_root / "workdir"), + TEST_HTTP_URL=f"http://127.0.0.1:{port}", + TEST_DB_PATH=str(fixture_root / "test_data.db"), + TEST_TAP_SCHEMA=str(fixture_root / "tap_schema.db"), + ) + conf_path = fixture_root / "TAP.conf" + conf_path.write_text(conf) + # Stash the port so tap_server can reuse it + (fixture_root / ".port").write_text(str(port)) + return conf_path + + +class _NphCGIHandler(http.server.BaseHTTPRequestHandler): + """Minimal nph-CGI runner. + + The TAP CGI script writes its own HTTP status line (it's an `nph-` / + non-parsed-headers script in Apache parlance — see TAP/tap.py:755, 2538, + etc.). Python's stdlib CGIHTTPRequestHandler is not nph-aware: it always + prepends a `HTTP/1.1 200 Script output follows` status line, so the + script's status line ends up looking like a header to the client. + + This handler runs the CGI subprocess and forwards its raw stdout to the + socket without modification — exactly what an Apache nph- script does. + """ + + nph_script_path = "" # set by the tap_server fixture before serve_forever + + def do_GET(self): + self._run_cgi() + + def do_POST(self): + self._run_cgi() + + def _run_cgi(self): + # Parse PATH_INFO and QUERY_STRING from self.path. + from urllib.parse import urlsplit + + parts = urlsplit(self.path) + # Two URL patterns are supported, both mapping to nph-tap.py: + # /cgi-bin/TAP/nph-tap.py/ — direct CGI URL + # /TAP/ — short form pyNEID uses, + # mimicking the Apache + # rewrite KOA/NEID/NEA + # run in production. + path = parts.path + marker = "/nph-tap.py" + if marker in path: + path_info = path.split(marker, 1)[1] + elif path.startswith("/TAP/"): + path_info = path[len("/TAP"):] # keep leading slash + else: + path_info = "" + + # Read request body (if any) for the subprocess stdin. + content_length = int(self.headers.get("Content-Length", "0") or "0") + body = self.rfile.read(content_length) if content_length else b"" + + env = os.environ.copy() + env.update({ + "GATEWAY_INTERFACE": "CGI/1.1", + "SERVER_PROTOCOL": "HTTP/1.1", + "SERVER_SOFTWARE": "test-nph-cgi/1.0", + "REQUEST_METHOD": self.command, + "QUERY_STRING": parts.query, + "PATH_INFO": path_info, + "SCRIPT_NAME": "/cgi-bin/TAP/nph-tap.py", + "REMOTE_ADDR": self.client_address[0], + "CONTENT_TYPE": self.headers.get("Content-Type", ""), + "CONTENT_LENGTH": str(content_length), + "HTTP_HOST": self.headers.get("Host", ""), + }) + + import subprocess + proc = subprocess.Popen( + [sys.executable, self.server.nph_script_path], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env, + ) + stdout, stderr = proc.communicate(input=body) + + # Forward raw subprocess stdout to the socket. The script is + # responsible for emitting a valid HTTP status line + headers. + try: + self.wfile.write(stdout) + self.wfile.flush() + except BrokenPipeError: + pass + + # Quieter logging during test runs. + def log_message(self, format, *args): + return + + +@pytest.fixture(scope="session") +def tap_server(fixture_root: Path, tap_conf: Path): + """Start http.server with the CGI handler. Yields the base URL.""" + port = int((fixture_root / ".port").read_text()) + + os.environ["TAP_CONF"] = str(tap_conf) + os.environ["TAP_PORT"] = str(port) + + # CGIHTTPRequestHandler runs each request as a subprocess; it inherits + # the parent's environment but not the in-process sys.path. Add the + # repo root to PYTHONPATH so `from TAP import tap` works in the CGI + # subprocess even if the package isn't pip-installed (CI installs the + # package; local pytest may not). + repo_root = TESTS_DIR.parent + paths = [str(repo_root), str(STUBS)] + existing = os.environ.get("PYTHONPATH", "") + if existing: + paths.append(existing) + os.environ["PYTHONPATH"] = os.pathsep.join(paths) + + original_cwd = os.getcwd() + os.chdir(fixture_root) + + httpd = http.server.HTTPServer(("127.0.0.1", port), _NphCGIHandler) + httpd.allow_reuse_address = True + httpd.nph_script_path = str(fixture_root / "cgi-bin" / "TAP" / "nph-tap.py") + thread = threading.Thread(target=httpd.serve_forever, daemon=True) + thread.start() + + # Brief wait for the server to come up + for _ in range(50): + try: + with socket.create_connection(("127.0.0.1", port), timeout=0.2): + break + except OSError: + time.sleep(0.05) + + base_url = f"http://127.0.0.1:{port}" + try: + yield base_url + finally: + httpd.shutdown() + httpd.server_close() + os.chdir(original_cwd) + + diff --git a/tests/fixtures/TAP.conf.template b/tests/fixtures/TAP.conf.template new file mode 100644 index 0000000..e0280d2 --- /dev/null +++ b/tests/fixtures/TAP.conf.template @@ -0,0 +1,45 @@ +# TAP.conf template for CI tests. +# +# The pytest `tap_conf` fixture in tests/conftest.py substitutes the +# placeholders below with absolute paths into the per-session tmpdir. +# +# Placeholders: +# {TEST_WORKDIR} — working dir for TAP request artifacts +# {TEST_HTTP_URL} — http://localhost:NNNN (fixture-allocated) +# {TEST_DB_PATH} — absolute path to test_data.db +# {TEST_TAP_SCHEMA} — absolute path to tap_schema.db + +[WEB] + TAP_WORKDIR={TEST_WORKDIR} + TAP_WORKURL=/workspace + + HTTP_URL={TEST_HTTP_URL} + HTTP_PORT=0 + CGI_PGM=TAP + + +# Default configuration instance — no `instance=` HTTP parameter needed. +[DBMS] + DBMS=sqlite3 + DB={TEST_DB_PATH} + TAP_SCHEMA={TEST_TAP_SCHEMA} + TAP_SCHEMA_FILE=TAP_SCHEMA + + # Propfilter setup — exercises the propflag guard added in PR #17. + ACCESS_TBL=test_access + USERS_TBL=test_users + FILEID=l0filename + ACCESSID=program + PROPFILTER=neid + RACOL=ra + DECCOL=dec + + +[SPTIND] + MODE=HTM + LEVEL=20 + XCOL=x + YCOL=y + ZCOL=z + COLNAME=sptind + ENCODING=BASE10 diff --git a/tests/fixtures/build_test_db.py b/tests/fixtures/build_test_db.py new file mode 100644 index 0000000..caa40ab --- /dev/null +++ b/tests/fixtures/build_test_db.py @@ -0,0 +1,344 @@ +"""Build the SQLite test fixtures: tap_schema.db and test_data.db. + +Used by the pytest `test_db` session fixture in tests/conftest.py. Can also +be run standalone for local inspection: + + python tests/fixtures/build_test_db.py /tmp/fixture-out +""" +from __future__ import annotations + +import os +import sqlite3 +import sys +from datetime import datetime, timedelta + + +# ---------------------------------------------------------------- TAP_SCHEMA + +TAP_SCHEMA_DDL = [ + """ + CREATE TABLE IF NOT EXISTS schemas ( + schema_name TEXT NOT NULL, + utype TEXT, + description TEXT, + schema_index INTEGER, + PRIMARY KEY (schema_name) + ) + """, + """ + CREATE TABLE IF NOT EXISTS tables ( + schema_name TEXT NOT NULL, + table_name TEXT NOT NULL, + table_type TEXT, + utype TEXT, + description TEXT, + table_index INTEGER, + PRIMARY KEY (table_name) + ) + """, + """ + CREATE TABLE IF NOT EXISTS columns ( + table_name TEXT NOT NULL, + column_name TEXT NOT NULL, + datatype TEXT, + arraysize TEXT, + size INTEGER, + description TEXT, + utype TEXT, + unit TEXT, + ucd TEXT, + indexed INTEGER, + principal INTEGER, + std INTEGER, + column_index INTEGER, + PRIMARY KEY (table_name, column_name) + ) + """, + """ + CREATE TABLE IF NOT EXISTS keys ( + key_id TEXT NOT NULL, + from_table TEXT, + target_table TEXT, + description TEXT, + utype TEXT, + PRIMARY KEY (key_id) + ) + """, + """ + CREATE TABLE IF NOT EXISTS key_columns ( + key_id TEXT NOT NULL, + from_column TEXT, + target_column TEXT, + PRIMARY KEY (key_id, from_column) + ) + """, +] + +SCHEMAS = [ + ("test", "Test data for nexsciTAP CI", 1), + ("TAP_SCHEMA", "Standard TAP schema description.", 2), +] + +# Only data_l0 and data_l1 are reachable as query targets; test_access and +# test_users are propfilter-internal tables that the validator allows in +# subqueries by way of the ACCESS_TBL/USERS_TBL config entries. +TABLES = [ + # (schema, table, description) + ("test", "data_l0", "Public-data test table (propint=0)."), + ("test", "data_l1", "Proprietary test table (mix of embargoed and released)."), + ("test", "test_access", "Propfilter access table — internal."), + ("test", "test_users", "Propfilter users table — internal."), + ("TAP_SCHEMA", "schemas", "TAP_SCHEMA.schemas."), + ("TAP_SCHEMA", "tables", "TAP_SCHEMA.tables."), + ("TAP_SCHEMA", "columns", "TAP_SCHEMA.columns."), + ("TAP_SCHEMA", "keys", "TAP_SCHEMA.keys."), + ("TAP_SCHEMA", "key_columns", "TAP_SCHEMA.key_columns."), +] + +# Columns for data_l0 and data_l1 — shape mirrors NEID's neidl0/neidl1 enough +# to drive the propfilter code paths and the UI's CASE-WHEN download-link +# query pattern. Keep small; only the columns used by tests need to exist. +DATA_L0_COLUMNS = [ + ("data_l0", "obsdate", "char", "DATE", "Observation date."), + ("data_l0", "obsjd", "double", None, "Julian date of observation."), + ("data_l0", "program", "char", None, "Program ID."), + ("data_l0", "piname", "char", None, "PI name."), + ("data_l0", "obsmode", "char", None, "Observation mode."), + ("data_l0", "obstype", "char", None, "Sci or Cal."), + ("data_l0", "l0propint", "int", "months", "Proprietary interval (months)."), + ("data_l0", "l0filename", "char", None, "Filename."), + ("data_l0", "l0filehand", "char", None, "File handle."), + ("data_l0", "l0filepath", "char", None, "File path."), + ("data_l0", "object", "char", None, "Object name."), + ("data_l0", "qobject", "char", None, "Resolved object."), + ("data_l0", "qra", "char", "deg", "Query RA."), + ("data_l0", "qdec", "char", "deg", "Query Dec."), + ("data_l0", "ra", "double", "deg", "RA."), + ("data_l0", "dec", "double", "deg", "Dec."), +] +DATA_L1_COLUMNS = [(("data_l1",) + c[1:]) for c in DATA_L0_COLUMNS] +# Rename the l0-prefixed columns to l1-prefixed for data_l1 +DATA_L1_COLUMNS = [ + (t, name.replace("l0", "l1") if name.startswith("l0") else name, *rest) + for (t, name, *rest) in DATA_L1_COLUMNS +] + +# Minimal column metadata for the propfilter-internal and TAP_SCHEMA tables +# — TableValidator looks at table names not column names, but the data +# dictionary code path queries `select * from tap_schema.columns where +# table_name = ?` so the rows must exist. +ACCESS_COLUMNS = [ + ("test_access", "userid", "char", None, "User ID."), + ("test_access", "program", "char", None, "Program ID accessible to userid."), +] +USERS_COLUMNS = [ + ("test_users", "userid", "char", None, "User ID."), + ("test_users", "cookie", "char", None, "Session cookie."), +] +SELF_COLUMNS = [ + ("schemas", "schema_name", "char", None, ""), + ("tables", "schema_name", "char", None, ""), + ("tables", "table_name", "char", None, ""), + ("columns", "table_name", "char", None, ""), + ("columns", "column_name", "char", None, ""), + ("columns", "datatype", "char", None, ""), + ("columns", "unit", "char", None, ""), + ("columns", "description", "char", None, ""), +] + + +def build_tap_schema(path: str) -> None: + """Populate tap_schema.db at *path*.""" + if os.path.exists(path): + os.remove(path) + conn = sqlite3.connect(path) + cur = conn.cursor() + for ddl in TAP_SCHEMA_DDL: + cur.execute(ddl) + cur.executemany( + "INSERT INTO schemas (schema_name, description, schema_index) VALUES (?, ?, ?)", + SCHEMAS, + ) + # TableValidator expects table_name values to be schema-qualified + # (e.g. "TAP_SCHEMA.tables", not just "tables") so it can populate its + # allowed_schemas set from the dotted prefix. See tablevalidator.py:50-60. + qualified_tables = [ + (schema, f"{schema}.{table}", description) + for (schema, table, description) in TABLES + ] + cur.executemany( + "INSERT INTO tables (schema_name, table_name, description) VALUES (?, ?, ?)", + qualified_tables, + ) + all_cols = DATA_L0_COLUMNS + DATA_L1_COLUMNS + ACCESS_COLUMNS + USERS_COLUMNS + SELF_COLUMNS + cur.executemany( + """INSERT INTO columns (table_name, column_name, datatype, unit, description) + VALUES (?, ?, ?, ?, ?)""", + all_cols, + ) + conn.commit() + conn.close() + + +# ---------------------------------------------------------------- data tables + +def build_test_data(path: str) -> None: + """Populate test_data.db with the data_l0, data_l1, test_access, test_users tables.""" + if os.path.exists(path): + os.remove(path) + conn = sqlite3.connect(path) + cur = conn.cursor() + + # data_l0 — public (l0propint = 0). 6 rows across 2 programs. + cur.execute(""" + CREATE TABLE data_l0 ( + obsdate TEXT, + obsjd REAL, + program TEXT, + piname TEXT, + obsmode TEXT, + obstype TEXT, + l0propint INTEGER, + l0filename TEXT, + l0filehand TEXT, + l0filepath TEXT, + object TEXT, + qobject TEXT, + qra TEXT, + qdec TEXT, + ra REAL, + dec REAL + ) + """) + + now = datetime.now() + l0_rows = [] + for i in range(6): + d = now - timedelta(days=10 + i) + prog = "TEST-A" if i % 2 == 0 else "TEST-B" + l0_rows.append(( + d.strftime("%Y-%m-%d %H:%M:%S"), # obsdate + 2460000.0 + i, # obsjd + prog, # program + "Test PI", # piname + "hr", # obsmode + "Cal" if i < 2 else "Sci", # obstype + 0, # l0propint — PUBLIC + f"testL0_2026{i:04d}.fits", # l0filename + f"/handle/{i}", # l0filehand + f"/data/L0/testL0_2026{i:04d}.fits", # l0filepath + f"OBJ-{i}", # object + f"OBJ-{i}", # qobject + "10.0", # qra + "20.0", # qdec + 10.0, # ra + 20.0, # dec + )) + cur.executemany( + "INSERT INTO data_l0 VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", l0_rows + ) + + # data_l1 — proprietary mix. Critical fixture invariant: at least some + # rows must be STILL EMBARGOED (today < add_months(obsdate, l1propint)) + # so propfilter has something to filter out. Without this, propflag=0 + # vs propflag=1 returns identical counts and the security test can't + # detect a regression. + cur.execute(""" + CREATE TABLE data_l1 ( + obsdate TEXT, + obsjd REAL, + program TEXT, + piname TEXT, + obsmode TEXT, + obstype TEXT, + l1propint INTEGER, + l1filename TEXT, + l1filehand TEXT, + l1filepath TEXT, + object TEXT, + qobject TEXT, + qra TEXT, + qdec TEXT, + ra REAL, + dec REAL + ) + """) + l1_rows = [] + # 3 public rows (propint=0) + for i in range(3): + d = now - timedelta(days=30 + i) + l1_rows.append(( + d.strftime("%Y-%m-%d %H:%M:%S"), 2460500.0 + i, + "TEST-PUBLIC", "PI Public", "hr", "Sci", 0, + f"testL1_public_{i:02d}.fits", f"/h/p/{i}", f"/data/L1/p{i}.fits", + "STAR", "STAR", "10.0", "20.0", 10.0, 20.0, + )) + # 3 expired-embargo rows (propint=12 months, obsdate > 2 years ago) + for i in range(3): + d = now - timedelta(days=900 + i) + l1_rows.append(( + d.strftime("%Y-%m-%d %H:%M:%S"), 2459500.0 + i, + "TEST-EXPIRED", "PI Expired", "hr", "Sci", 12, + f"testL1_expired_{i:02d}.fits", f"/h/e/{i}", f"/data/L1/e{i}.fits", + "STAR", "STAR", "10.0", "20.0", 10.0, 20.0, + )) + # 4 STILL-EMBARGOED rows (propint=24 months, obsdate < 6 months ago) + for i in range(4): + d = now - timedelta(days=60 + i * 10) + l1_rows.append(( + d.strftime("%Y-%m-%d %H:%M:%S"), 2461000.0 + i, + "TEST-PROPRIETARY", "PI Proprietary", "hr", "Sci", 24, + f"testL1_embargoed_{i:02d}.fits", f"/h/x/{i}", f"/data/L1/x{i}.fits", + "STAR", "STAR", "10.0", "20.0", 10.0, 20.0, + )) + cur.executemany( + "INSERT INTO data_l1 VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", l1_rows + ) + + # test_access — propfilter access table. Maps userid -> program. + cur.execute(""" + CREATE TABLE test_access ( + userid TEXT, + program TEXT + ) + """) + cur.executemany( + "INSERT INTO test_access VALUES (?, ?)", + [ + ("testuser", "TEST-PROPRIETARY"), + ], + ) + + # test_users — propfilter users table. Holds session token mapping. + cur.execute(""" + CREATE TABLE test_users ( + userid TEXT, + cookie TEXT + ) + """) + cur.executemany( + "INSERT INTO test_users VALUES (?, ?)", + [ + ("testuser", "fake-test-cookie"), + ], + ) + + conn.commit() + conn.close() + + +def build_all(out_dir: str) -> dict: + """Build both fixtures under *out_dir*. Returns {'tap_schema': path, 'test_data': path}.""" + os.makedirs(out_dir, exist_ok=True) + tap_schema_path = os.path.join(out_dir, "tap_schema.db") + test_data_path = os.path.join(out_dir, "test_data.db") + build_tap_schema(tap_schema_path) + build_test_data(test_data_path) + return {"tap_schema": tap_schema_path, "test_data": test_data_path} + + +if __name__ == "__main__": + out = sys.argv[1] if len(sys.argv) > 1 else "." + paths = build_all(out) + for k, v in paths.items(): + print(f"{k}: {v}") diff --git a/tests/fixtures/nph-tap.py b/tests/fixtures/nph-tap.py new file mode 100644 index 0000000..3912999 --- /dev/null +++ b/tests/fixtures/nph-tap.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +"""CGI entry point for the CI test fixture. + +Matches the production CGI shim documented at Sphinx/install.rst. The pytest +`tap_server` fixture in tests/conftest.py serves this file via +http.server.CGIHTTPRequestHandler with TAP_CONF set to the fixture's TAP.conf. +""" +from TAP import tap + +tap.Tap() diff --git a/tests/fixtures/stubs/spatial_index.py b/tests/fixtures/stubs/spatial_index.py new file mode 100644 index 0000000..356c803 --- /dev/null +++ b/tests/fixtures/stubs/spatial_index.py @@ -0,0 +1,22 @@ +"""Minimal stub of the spatial_index package for CI use. + +The real `spatial_index` package on PyPI only ships wheels for Python ≤ 3.8, +so the CI matrix (3.9 / 3.10 / 3.11 / 3.12) can't install it. TAP imports +the package unconditionally for four module-level constants — that's all +the surface we exercise in tests, since no test query uses spatial +geometry functions. + +If/when a test starts exercising spatial queries (POINT, CIRCLE, CONTAINS, +DISTANCE), this stub will need to grow or the real package needs to be +installed. + +Tracking the upstream issue: spatial-index 1.1.0 on PyPI lacks a source +distribution and modern-Python wheels. +""" + + +class SpatialIndex: + HTM = 1 + HPX = 2 + BASE4 = 10 + BASE10 = 16 diff --git a/tests/test_http_endpoint.py b/tests/test_http_endpoint.py new file mode 100644 index 0000000..6fd9377 --- /dev/null +++ b/tests/test_http_endpoint.py @@ -0,0 +1,89 @@ +"""Smoke tests for the CI HTTP fixture. + +These don't probe security boundaries (see test_security_e2e.py in the next +PR). They just confirm the fixture itself works: the CGI launches, the SQLite +DBs are wired in, and a few representative queries make it through the full +HTTP→CGI→TAP→SQLite→IPAC pipeline. +""" +from __future__ import annotations + +import requests +from _helpers import row_count_from_ipac, tap_sync_url + + +def test_server_starts(tap_server: str): + """The fixture comes up and the CGI returns something for a trivial GET.""" + r = requests.get(tap_server) + # Root path is not a CGI script — we expect a 200 with the directory + # listing or a 404; either confirms the server itself is alive. + assert r.status_code in (200, 404) + + +def test_sync_select_l0(tap_server: str): + """A trivial select against data_l0 returns 6 rows in IPAC format.""" + r = requests.post( + tap_sync_url(tap_server), + data={ + "query": "select l0filename, program from data_l0", + "format": "ipac", + }, + ) + assert r.status_code == 200, r.text[:500] + assert row_count_from_ipac(r.text) == 6 + + +def test_sync_select_l1(tap_server: str): + """data_l1 has 10 rows total (3 public + 3 expired + 4 embargoed).""" + r = requests.post( + tap_sync_url(tap_server), + data={ + "query": "select l1filename from data_l1", + "format": "ipac", + # propflag=0 → tapQuery path → no filter → all rows + "propflag": "0", + }, + ) + assert r.status_code == 200, r.text[:500] + assert row_count_from_ipac(r.text) == 10 + + +def test_sync_get_method(tap_server: str): + """Sync TAP must also accept GET (not just POST).""" + r = requests.get( + tap_sync_url(tap_server), + params={ + "query": "select l0filename from data_l0", + "format": "ipac", + }, + ) + assert r.status_code == 200, r.text[:500] + assert row_count_from_ipac(r.text) == 6 + + +def test_tap_schema_query(tap_server: str): + """tap_schema queries take a separate code path; confirm it works.""" + r = requests.post( + tap_sync_url(tap_server), + data={ + "query": "select table_name from tap_schema.tables", + "format": "ipac", + }, + ) + assert r.status_code == 200, r.text[:500] + # Our fixture registers 4 user tables + 5 TAP_SCHEMA self-tables = 9. + assert row_count_from_ipac(r.text) >= 9 + + +def test_csv_format(tap_server: str): + """The CSV serializer works.""" + r = requests.post( + tap_sync_url(tap_server), + data={ + "query": "select l0filename from data_l0", + "format": "csv", + }, + ) + assert r.status_code == 200, r.text[:500] + lines = [L for L in r.text.splitlines() if L.strip()] + # 1 header line + 6 data lines + assert len(lines) == 7 diff --git a/tests/test_pyneid_compat.py b/tests/test_pyneid_compat.py new file mode 100644 index 0000000..e927e0a --- /dev/null +++ b/tests/test_pyneid_compat.py @@ -0,0 +1,54 @@ +"""pyNEID compatibility regression tests. + +Currently all skipped — the test fixture's HTTP handler doesn't yet +support TAP's full async flow end-to-end. + +Why these matter: pyNEID's `query_adql` always uses the async TAP +endpoint, which works in three round-trips: + +1. POST to /TAP/async with the ADQL query → server returns 303 See Other + with a Location: header. +2. POST to with PHASE=RUN → server starts the job, returns + another 303 to the same status URL. +3. GET the status URL repeatedly until phase=COMPLETED, then GET the + results URL embedded in the status XML. + +Steps 2–3 depend on the server persisting per-request workspace state +(status.xml, results) under TAP_WORKDIR keyed by a workspace ID +embedded in the URL path. The current `_NphCGIHandler` in +tests/conftest.py routes by URL prefix but doesn't model the workspace +lifecycle — each request is a fresh CGI subprocess that doesn't know +about prior request state. Production runs under Apache, which manages +the same workdir mount across requests so the state naturally +persists, but reproducing that fidelity in CI needs: + +- Sticky working directory across requests (already mostly there since + we set `os.chdir(fixture_root)` once at fixture setup) +- Correct PATH_INFO routing for the second and subsequent calls in the + async flow (the Location URL pyNEID gets back may not match the URL + pattern the handler routes today) +- Possibly more debug — when this was tried locally, pyNEID hung at + step 2 with no observable error in the TAP debug log + +Equivalent direct-HTTP coverage of the same code paths exists in +tests/test_security_e2e.py and tests/test_http_endpoint.py — those +exercise the sync endpoint, which uses a single request and doesn't +need the workspace dance. The pyNEID layer adds value as a regression +check on top of those (it's the exact wrapper production traffic hits), +but is not blocking. + +To wire this up later: trace the async POST/poll loop, fix the handler +routing, then drop the skip decorator. +""" +from __future__ import annotations + +import pytest + +# Once the async fixture support lands, drop this module-level skip and +# re-add the actual test bodies (a snapshot of the intended tests is in +# the git history of this file or in the design plan). +pytest.skip( + "Async TAP flow not yet supported by the test fixture handler. " + "See module docstring for what's needed.", + allow_module_level=True, +) diff --git a/tests/test_security_e2e.py b/tests/test_security_e2e.py new file mode 100644 index 0000000..772ef38 --- /dev/null +++ b/tests/test_security_e2e.py @@ -0,0 +1,252 @@ +"""End-to-end security regression tests. + +Each test is named after the specific regression or security boundary it +guards. The intent is that any future change that re-opens one of these +holes turns the corresponding test red at PR time, making the regression +unmissable in code review. + +Actual TAP status-code behavior in v3.0.0: +- 403 only for `TableValidationError`. The validator raises this for + *system-catalog* references (e.g., sqlite_master, dba_users), and + the table whitelist raises it for tables not in TAP_SCHEMA.tables. +- 400 for everything else, including DML rejection, dangerous-function + rejection, semicolon-chained statements, and ordinary query errors. + This is inconsistent — DML/dangerous-func rejections are security + policy decisions and should arguably also be 403 — but the existing + code raises plain Exception for those, not TableValidationError. + Tests match the actual behavior; harmonizing the codes is a follow-up. +- 500 for startup failures. +""" +from __future__ import annotations + +import pytest +import requests +from _helpers import row_count_from_ipac, tap_sync_url + +# ---------------------------------------------------------------- helpers + +def _post(server: str, query: str, **params) -> requests.Response: + """POST a sync TAP query.""" + data = {"query": query, "format": "ipac"} + data.update(params) + return requests.post(tap_sync_url(server), data=data) + + +def _count(server: str, query: str, **params) -> int: + """Run a query and return the row count from the IPAC response. + + Asserts a 200 before counting — caller should expect success. + """ + r = _post(server, query, **params) + assert r.status_code == 200, f"expected 200 got {r.status_code}: {r.text[:300]}" + return row_count_from_ipac(r.text) + + +# ---------------------------------------------------------------- table whitelist + +def test_table_whitelist_rejects_system_catalog(tap_server: str): + """SQL injection class: system-catalog reads must be blocked. + + Regression marker: cd3d963 (initial table whitelist). + """ + r = _post(tap_server, "select * from sqlite_master") + assert r.status_code == 403, r.text[:300] + + +def test_table_whitelist_rejects_unregistered_table(tap_server: str): + """Tables not in TAP_SCHEMA.tables are 403, not 400 or 200.""" + r = _post(tap_server, "select * from not_in_tap_schema_at_all") + assert r.status_code == 403, r.text[:300] + + +def test_table_whitelist_allows_propfilter_internal_table_in_subquery(tap_server: str): + """The propfilter's access-control table is reachable from subqueries. + + Regression markers: bdbd2d7, de1941f. The fix excluded the configured + ACCESS_TBL / USERS_TBL from validation so the prop check could still + reference them in `WHERE program IN (SELECT program FROM test_access ...)`. + """ + q = ( + "select l1filename from data_l1 " + "where program in (select program from test_access where userid='') " + "or l1propint = 0" + ) + r = _post(tap_server, q, propflag="0") + assert r.status_code == 200, r.text[:300] + + +def test_order_by_not_treated_as_table(tap_server: str): + """ORDER BY columns must not be misidentified as table names. + + Regression marker: 1404e15. sqlparse tokenizes 'ORDER BY' as a single + keyword; the table extractor was treating columns after it as tables + until that fix. + """ + r = _post(tap_server, "select l0filename from data_l0 order by obsdate") + assert r.status_code == 200, r.text[:300] + assert row_count_from_ipac(r.text) == 6 + + +def test_unknown_statement_does_not_skip_validation(tap_server: str): + """Oracle-dialect queries using ROWNUM must still be validated. + + Regression marker: 867278c. sqlparse returns get_type() == 'UNKNOWN' + for ROWNUM-bearing queries; an earlier extractor skipped UNKNOWN + statements entirely, letting them through validation. The fix + extracts tables from UNKNOWN statements too. + """ + r = _post( + tap_server, + "select * from not_in_tap_schema where rownum < 10", + ) + assert r.status_code == 403, r.text[:300] + + +# ---------------------------------------------------------------- propflag + +def test_propflag_zero_honored_for_simple_l0(tap_server: str): + """L0 with propflag=0 must return all rows — the UI's L0 metadata + listing path. data_l0 has 6 rows total (all public, propint=0).""" + n = _count(tap_server, "select l0filename from data_l0", propflag="0") + assert n == 6 + + +@pytest.mark.skip( + reason="Cannot exercise the propflag guard with the SQLite fixture. " + "configparam.py only reads PROPFILTER/ACCESS_TBL/USERS_TBL " + "inside its `if self.dbms == 'oracle'` branch (~lines 255-345), " + "so on a SQLite config those keys are silently dropped and " + "self.config.propfilter ends up '' — neither the NEID nor the " + "KOA guard fires. This is a separate bug worth fixing (the " + "guard semantics should not be Oracle-only). End-to-end propflag " + "coverage will land with the deferred Oracle/Postgres/MySQL CI " + "matrix work." +) +def test_propflag_bypass_routes_l1_through_propfilter(tap_server: str): + """L1 with propflag=0 must route through propfilter, not bare tapQuery. + + Regression markers: PR #16 + PR #17. The guard in tap.py:1612-1638 + forces propflag=1 when datalevel is eng/l1/l2. Cannot be exercised + against the SQLite fixture (see skip reason).""" + # If/when configparam.py is fixed to read PROPFILTER for non-Oracle + # configs, this test will run and assert: + # - propflag=0 status_code != 200 (because propfilter then errors + # on add_months — fixing THAT is a separate concern) + # - propflag=0 status_code == propflag=1 status_code + raise AssertionError("Skipped — see decorator") + + +def test_propflag_whitelist_l0_complex_case_does_not_misfire(tap_server: str): + """The UI's L0 download-link query shape (CASE WHEN with a subquery + FROM nested in the SELECT list) with propflag=0 must succeed and + return rows. + + Regression marker: PR #16 introduced a bug where the propflag guard + used `datalevel != 'l0'`, which evaluates True for the empty + datalevel that __getDatalevel__ returns when the table-name parser + picks the subquery's table first. That bug routed the L0 query + through propfilter, which then constructed malformed prop-check SQL. + + PR #17 switched to an explicit `datalevel in ('eng','l1','l2')` + whitelist so the misfire stopped happening — when datalevel='', the + whitelist doesn't match, and the L0 query stays on the tapQuery path. + + Query structure is the regression trigger: a CASE WHEN containing a + `SELECT ... FROM test_access` subquery in the SELECT list. The + table-name parser yields `test_access` first; __getDatalevel__ on + 'test_access' returns ''. Production used `add_months(obsdate, + l0propint)` in the CASE condition; SQLite doesn't have add_months, + so this test uses a comparison the table-name parser still treats + identically. + """ + q = ( + "select (case when program in " + "(SELECT program from test_access where userid = '') " + "then l0filename else 'redacted' end) as fn, " + "l0filename, program from data_l0" + ) + r = _post(tap_server, q, propflag="0") + assert r.status_code == 200, r.text[:300] + assert row_count_from_ipac(r.text) == 6 + + +# ---------------------------------------------------------------- statement validator + +def test_statement_validator_rejects_dml(tap_server: str): + """INSERT/UPDATE/DELETE/DROP must all be rejected (status 400). + + `validate_statement` raises plain Exception (not TableValidationError) + for these, so the response is 400 rather than 403. Arguably they + should also be 403 — security policy rejection rather than malformed + query — but harmonizing the codes is a separate follow-up. + """ + for dml in ( + "insert into data_l0 (l0filename) values ('x')", + "update data_l0 set l0filename = 'x'", + "delete from data_l0", + "drop table data_l0", + ): + r = _post(tap_server, dml) + assert r.status_code == 400, f"{dml!r} -> {r.status_code}: {r.text[:200]}" + assert "not permitted" in r.text or "rejected" in r.text, r.text[:300] + + +def test_statement_validator_rejects_oracle_dangerous_functions(tap_server: str): + """utl_http, dbms_sql, sys_context etc. must be rejected (status 400). + + Same 400-vs-403 inconsistency as DML rejection — see that test. + """ + for sql in ( + "select utl_http.request('http://evil/x') from data_l0", + "select dbms_sql.parse(1) from data_l0", + "select sys_context('USERENV','IP_ADDRESS') from data_l0", + ): + r = _post(tap_server, sql) + assert r.status_code == 400, f"{sql!r} -> {r.status_code}: {r.text[:200]}" + assert "not permitted" in r.text, r.text[:300] + + +def test_statement_validator_rejects_semicolon_chained(tap_server: str): + """Semicolon-chained statements (classic SQL injection) are rejected. + + Same 400-vs-403 inconsistency as DML rejection. + """ + r = _post( + tap_server, + "select l0filename from data_l0; drop table data_l0", + ) + assert r.status_code == 400, r.text[:300] + assert "semicolon" in r.text.lower(), r.text[:300] + + +# ---------------------------------------------------------------- HTTP status codes + +def test_http_status_code_400_for_query_error(tap_server: str): + """A query referencing a missing column on a valid table → 400.""" + r = _post(tap_server, "select column_that_does_not_exist from data_l0") + # The column doesn't exist; SQLite/TAP returns this as a query error, + # which is 400 (not 403, since the table itself is valid). + assert r.status_code == 400, r.text[:300] + + +# ---------------------------------------------------------------- error message safety + +def test_table_reject_error_message_is_safe(tap_server: str): + """Error responses for rejected tables must not leak internal paths + or expose stack-trace internals. + + Regression marker: 4fa46ff (generic startup error messages) + + c202285 (don't leak raw Oracle errors). This test is a sanity check + that the error response for a table rejection stays clean. + """ + r = _post(tap_server, "select * from sqlite_master") + body = r.text + # No bare file paths (full /tmp/, /var/, /Users/ etc.) + forbidden_substrings = [ + "Traceback", + "Password=", + "/tmp/tap_", + "site-packages", + ] + for s in forbidden_substrings: + assert s not in body, f"Leaked {s!r} in error response:\n{body[:500]}" diff --git a/tests/test_tablevalidator.py b/tests/test_tablevalidator.py index 5faff1e..d91719f 100644 --- a/tests/test_tablevalidator.py +++ b/tests/test_tablevalidator.py @@ -3,7 +3,7 @@ import tempfile import unittest -from TAP.tablevalidator import TableValidator, TableValidationError +from TAP.tablevalidator import TableValidationError, TableValidator def _make_db(table_names, schema_name='TAP_SCHEMA', tables_table='tables'):