Docshund has implemented two API calls to automate extracting data from PDF tax forms. One api call is used to extract the data in a output format that is prefered and the other api call gives the remaining balance of page credit
To Extract data from tax PDF form in excel,csv,xml or json format using API, do a HTTP post requst with multipart/form data to
https://docshund.com/api1
For example using curl you can give the following command:
curl https://docshund.com/api1 -F "op=26" -F "key=API_KEY_GIVEN_TO_YOU" \
-F "type=PDF" -F "fmt=CSV" -F "file=@your_tax_file.pdf"
In the above command in the place of API_KEY_GIVEN_TO_YOU use the actual api key that is assigned to you while getting docshund account (displayed in the profile tab once you activated your account)
in the place of your_tax_file.pdf use your actual tax file PDF name. For output format (specified with fmt param) you can have different options:
CSV = Comma seperated values, a blank page will be inserted between the data for pages
XLSX-S = Excel format, data for all pages will be in one single excel sheet with a blank row between pages
XLSX-M = Excel format, seperate excel sheet for each PDF page
XML = xml format
JSON = json format
To see the reamining credit balance do a HTTP post requst with multipart/form data to
https://docshund.com/api1
For example using curl you can give the following command:
curl https://docshund.com/api1 -F "op=25" -F "key=API_KEY_GIVEN_TO_YOU"
{"page_credits":"6997"}
Given below are examples of how the API can be utilised in different programming languages:
import requests
import json
def parsePdfForm(url,api_key,file_pdf,rspfmt):
files_data = {
"key": (None,api_key),
"op": (None, "26"),
"fmt":(None,rspfmt),
"type":(None,"PDF"),
"file":("file",open(file_pdf,'rb'))
}
try:
# Make the POST request
response = requests.post(url, files=files_data)
return response.text,response.status_code
except:
return None,400
def getRemainingCredit(url,api_key):
files_data = {
"key": (None,api_key),
"op": (None, "25"),
}
try:
# Make the POST request
response = requests.post(url, files=files_data)
return response.text,response.status_code
except:
return None,400
if __name__ == "__main__":
url = "https://docshund.com/api1"
api_key = "0a893201858944b9acfc492cfd5adc09952f89dbaecb3a49"
pdf_file = "/home/tst/f1040_2024_filled.pdf"
rspfmt = "CSV"
# change pdf_file to the file you want to parse
# change api_key with the one assigned to you (See the profile page)
# change rspfmt to one that you prefer like XLSX-S , XLSX-M , XML , JSON etc
rsp_body,status = getRemainingCredit(url,api_key)
print("Status:", status)
print("Response:", rsp_body)
rsp_body,status = parsePdfForm(url,api_key,pdf_file,rspfmt)
print("Status:", status)
print("Response:", rsp_body)
import axios from 'axios';
import FormData from 'form-data';
import fs from 'fs';
async function parsePdfForm(url,api_key,file_pdf,rspfmt) {
// Create FormData instance
const formData = new FormData();
// Add form fields
formData.append('key', api_key);
formData.append('op','26');
formData.append('fmt',rspfmt);
formData.append('type','PDF');
// Add files
formData.append('file', fs.createReadStream(file_pdf));
try {
const response = await axios.post(url, formData, {
headers: {
...formData.getHeaders(),
},
maxBodyLength: Infinity, // For large files
});
console.log('Status:', response.status);
console.log('Response:', response.data);
} catch (error) {
console.error('Error:', error.message);
}
}
async function getRemainingCredit(url,api_key) {
// Create FormData instance
const formData = new FormData();
// Add form fields
formData.append('key', api_key);
formData.append('op','25');
try {
const response = await axios.post(url, formData, {
headers: {
...formData.getHeaders(),
},
maxBodyLength: Infinity, // For large files
});
console.log('Status:', response.status);
console.log('Response:', response.data);
} catch (error) {
console.error('Error:', error.message);
}
}
let url = 'https://docshund.com/api1';
let api_key = "0a2050c185889ad2acfc492cfd5adc09952f89dbaecb3a49"
let rspfmt = "CSV"
let pdf_file = "/home/tst/f1040_2024_filled.pdf";
// change pdf_file to the file you want to parse
// change api_key with the one assigned to you (See the profile page)
// change rspfmt to one that you prefer like XLSX-S , XLSX-M , XML , JSON etc
getRemainingCredit(url,api_key);
parsePdfForm(url,api_key,pdf_file,rspfmt);
package main
import (
"bytes"
"fmt"
"io"
"mime/multipart"
"net/http"
"os"
)
func main() {
url := "https://docshund.com/api1"
api_key := "0a2053c1857944d9acfc492cfd50dc09952f89dbaecb3a49"
rspfmt := "CSV"
pdf_file := "/home/tst/f1040_2024_filled.pdf"
// change pdf_file to the file you want to parse
// change api_key with the one assigned to you (See the profile page)
// change rspfmt to one that you prefer like XLSX-S , XLSX-M , XML , JSON etc
rsp_body,status := getRemainingCredit(url,api_key)
fmt.Printf("Status: %d\n", status)
fmt.Printf("Response: %s\n", string(rsp_body))
rsp_body,status = parsePdfForm(url,api_key,pdf_file,rspfmt)
fmt.Printf("Status: %d\n", status)
fmt.Printf("Response: %s\n", string(rsp_body))
}
func parsePdfForm(url string, api_key string, pdf_file string, rspfmt string) ([] byte,int) {
// Create a buffer for the multipart form
var requestBody bytes.Buffer
writer := multipart.NewWriter(&requestBody)
// Add form fields
_ = writer.WriteField("key", api_key)
_ = writer.WriteField("op", "26")
_ = writer.WriteField("fmt", rspfmt)
_ = writer.WriteField("type", "PDF")
// Add files
addFileToWriter(writer, "file", pdf_file)
// Close the writer to finalize the multipart form
writer.Close()
// Create the request
req, err := http.NewRequest("POST", url, &requestBody)
if err != nil {
panic(err)
}
// Set the content type with boundary
req.Header.Set("Content-Type", writer.FormDataContentType())
// Execute the request
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
// Read response
body, err := io.ReadAll(resp.Body)
if err != nil {
panic(err)
}
return body,resp.StatusCode
//fmt.Printf("Status: %d\n", resp.StatusCode)
//fmt.Printf("Response: %s\n", string(body))
}
func getRemainingCredit(url string, api_key string) ([] byte,int) {
// Create a buffer for the multipart form
var requestBody bytes.Buffer
writer := multipart.NewWriter(&requestBody)
// Add form fields
_ = writer.WriteField("key", api_key)
_ = writer.WriteField("op", "25")
// Close the writer to finalize the multipart form
writer.Close()
// Create the request
req, err := http.NewRequest("POST", url, &requestBody)
if err != nil {
panic(err)
}
// Set the content type with boundary
req.Header.Set("Content-Type", writer.FormDataContentType())
// Execute the request
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
// Read response
body, err := io.ReadAll(resp.Body)
if err != nil {
panic(err)
}
return body,resp.StatusCode
}
func addFileToWriter(writer *multipart.Writer, fieldName, filename string) {
file, err := os.Open(filename)
if err != nil {
panic(err)
}
defer file.Close()
part, err := writer.CreateFormFile(fieldName, filename)
if err != nil {
panic(err)
}
_, err = io.Copy(part, file)
if err != nil {
panic(err)
}
}