Python 3 + NTLM + Sharepoint

ntlmpythonsharepointsharepoint-2013

I am attempting to create a tool for interfacing with Sharepoint to start transitioning a number of old processes away from using Sharepoint as a relational database and move that data into an actual database and automate several manual tasks among other things.

The environment I am working with is a Sharepoint 2013 server using NTLM authentication. I need to use python3 to be able to use a set of libraries that will later be used to consume the data. Python is running via Anaconda 64 bit. I have no administrative power over the sharepoint server at all, just the ability to read and change information of certain pages.

The problem I am running into is authenticating using NTLM and the Sharepoint module together. The following snippet works just fine for pulling the HTML from the site as if it were a browser:

from <<password management library>> import KEY
from requests_ntlm import HttpNtlmAuth
import requests,getpass

#Configuration
domain='domain'
srv="sharepoint.company.com"
creds=KEY(srv,getpass.getuser()) #Secure credential retrieval
user='{0}\\{1}'.format(domain,creds.USERNAME).upper()
srvaddr='https://{0}'.format(srv)
site='/sites/path/to/site/'
site_url='{0}{1}'.format(srvaddr,site)

#Auth via Requests_NTLM
opener=HttpNtlmAuth(user,creds.getpass())

#Interface via Requests
s = sharepoint.SharePointSite(site_url, opener)
r = requests.get(srvaddr, auth=opener)

Using the documentation, source code provided in the sharepoint library's documentation and a few Stackoverflow posts (that refer to python 2 libraries), I attempted to replicate this using the sharepoint module. While the above is able to successfully authenticate, the following snippet gets a 401 Unauthorized Error:

import sharepoint,requests,getpass,urllib
from ntlm3.HTTPNtlmAuthHandler import HTTPNtlmAuthHandler
from <<password management library>> import KEY
from requests_ntlm import HttpNtlmAuth
from urllib.request import BaseHandler, HTTPPasswordMgrWithDefaultRealm, build_opener

#Configuration
domain='domain'
srv="sharepoint.company.com"
creds=KEY(srv,getpass.getuser()) #Secure credential retrieval
user='{0}\\{1}'.format(domain,creds.USERNAME).upper()
srvaddr='https://{0}'.format(srv)
site='/sites/path/to/site/'
site_url='{0}{1}'.format(srvaddr,site)

#Auth via urllib Opener
def basic_auth_opener(url, username, password):
    password_manager = HTTPPasswordMgrWithDefaultRealm()
    password_manager.add_password(None, url, username, password)
    auth_handler = HTTPNtlmAuthHandler(password_manager) #PreemptiveBasicAuthHandler(password_manager)
    opener = build_opener(auth_handler)
    return opener

print('Auth as {0}@{1}'.format(user,srvaddr))

opener = basic_auth_opener(srvaddr, user, creds.getpass())
#urllib.request.install_opener(opener)
print('Open {0}'.format(site_url))

#Interface via Sharepoint module
s = sharepoint.SharePointSite(site_url, opener)
for sp_list in s.lists:
    print (sp_list.id, sp_list.meta['Title'])

I know that the credentials work and are authorized to access the site since the first snippet works just fine. The second simply gets a 401 error and I have been diving into the inner workings of urllib, requests, python-ntlm, and requests-ntlm all day trying to understand why one works but the other does not.

Error stack for reference:

HTTPErrorTraceback (most recent call last)
<ipython-input-41-af721d5620e7> in <module>()
     31 #Interface via Sharepoint module
     32 s = sharepoint.SharePointSite(site_url, opener)
---> 33 for sp_list in s.lists:
     34     print (sp_list.id, sp_list.meta['Title'])

C:\Anaconda3\lib\site-packages\sharepoint\lists\__init__.py in __iter__(self)
     78 
     79     def __iter__(self):
---> 80         return iter(self.all_lists)
     81 
     82     def __getitem__(self, key):

C:\Anaconda3\lib\site-packages\sharepoint\lists\__init__.py in all_lists(self)
     34         if not hasattr(self, '_all_lists'):
     35             xml = SP.GetListCollection()
---> 36             result = self.opener.post_soap(LIST_WEBSERVICE, xml)
     37 
     38             self._all_lists = []

C:\Anaconda3\lib\site-packages\sharepoint\site.py in post_soap(self, url, xml, soapaction)
     30         if soapaction:
     31             request.add_header('Soapaction', soapaction)
---> 32         response = self.opener.open(request, timeout=self.timeout)
     33         return etree.parse(response).xpath('/soap:Envelope/soap:Body/*', namespaces=namespaces)[0]
     34 

C:\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
    470         for processor in self.process_response.get(protocol, []):
    471             meth = getattr(processor, meth_name)
--> 472             response = meth(req, response)
    473 
    474         return response

C:\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
    580         if not (200 <= code < 300):
    581             response = self.parent.error(
--> 582                 'http', request, response, code, msg, hdrs)
    583 
    584         return response

C:\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
    508         if http_err:
    509             args = (dict, 'default', 'http_error_default') + orig_args
--> 510             return self._call_chain(*args)
    511 
    512 # XXX probably also want an abstract factory that knows when it makes

C:\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
    442         for handler in handlers:
    443             func = getattr(handler, meth_name)
--> 444             result = func(*args)
    445             if result is not None:
    446                 return result

C:\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
    588 class HTTPDefaultErrorHandler(BaseHandler):
    589     def http_error_default(self, req, fp, code, msg, hdrs):
--> 590         raise HTTPError(req.full_url, code, msg, hdrs, fp)
    591 
    592 class HTTPRedirectHandler(BaseHandler):

HTTPError: HTTP Error 401: Unauthorized

Best Answer

Shareplum ended up working great, with some exceptions due to the way it imported some data. Here's some rough code I ended up using to test it and later expand, censored, but enough to get someone started if they stumble across this:

from <<DATABASE LIBRARY>> import KEY,ORACLESQL
from requests_ntlm import HttpNtlmAuth
import requests,getpass,datetime,re,json
import shareplum

def date_handler(obj):
    return obj.isoformat() if hasattr(obj, 'isoformat') else obj

def ppJSON(inDict):
    return (json.dumps(inDict,sort_keys=True, indent=4, default=date_handler))


####### BEGIN: Monkey Patch to Shareplum ###################
def _python_type(self, key, value):
    """Returns proper type from the schema"""
    try:
        field_type = self._sp_cols[key]['type']
        if field_type in ['Number', 'Currency']:
            return float(value)
        elif field_type == 'DateTime':
            # Need to round datetime object
            return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')#.date()
        elif field_type == 'Boolean':
            if value == '1':
                return 'Y'
            elif value == '0':
                return 'N'
            else:
                return ''
        elif field_type == 'User':
            # Sometimes the User no longer exists or
            # has a diffrent ID number so we just remove the "123;#"
            # from the beginning of their name
            if value in self.users['sp']:
                return self.users['sp'][value]
            else:
                return value.split('#')[1]
        else:
            return value.encode('ascii','replace').decode('ascii')
    except AttributeError as e:
        print(e)
        return value

#Monkey Patching the datetime import to avoid truncation of data
shareplum.shareplum._List._python_type=_python_type
####### END: Monkey Patch to Shareplum ###################


class SPSite(object):
    def __init__(self,username,password,server,site,listname=None,viewname=None,domain='sensenet'):
        self.domain=domain
        self.server=server
        self.site=site
        self.listname=listname
        self.viewname=viewname
        self.user='{0}\\{1}'.format(self.domain,username).upper()
        self.opener=HttpNtlmAuth(self.user,password)
        self.s=None
        self.l=None
        self.sql=None
        self.updateMetaData()
    def updateMetaData(self):
        self.srvaddr='https://{0}'.format(self.server)
        self.site_url='{0}{1}'.format(self.srvaddr,self.site)
        self.s=shareplum.Site(self.site_url,self.opener)
        if self.listname:
            self.l=self.s.List(self.listname)
    def showSampleData(self,view=None,*args,**kwargs):
        s=self.s
        #theview=view
        #if not theview and not fields: theview=self.viewname
        #Display Data
        print('Lists in {0}'.format(self.site_url))
        for i in s.GetListCollection():
            print(i['Title'])
        if self.listname:
            l=s.List(self.listname)
            print('\nViews in List {0}'.format(self.listname))
            for i in l.views:
                print(i)
            if self.viewname:
                print('\nView Fields for {0}'.format(viewname))
                for i in l.GetView(viewname)['fields']:
                    print (i)
                print('\nData in View {0}'.format(self.viewname))
                for i in l.GetListItems(rowlimit=10,*args,**kwargs):
                    print(ppJSON(i))

Example:

#Configuration
domain='DomainName'
srv="sharepoint.site.com"
creds=KEY(srv,getpass.getuser()) #Secure credential retrieval
site='/sites/path/to/site'
listname='Some List'
viewname='Some View I Want to ETL'

tablename='SP_{0}_STG'.format(to_oracle(listname))

#Show Sample Data
s=SPSite(creds.USERNAME,creds.getpass(),srv,site,listname,viewname)

s.showSampleData(viewname)