Python Project With Source Code - Website Summarization API
Website Summarization API
This project is carried out for the purpose of building a machine learning
model for summarising a website from urls;
Getting Started
These instructions will get you a copy of the project up and running on your
local machine for development and testing purposes.
Prerequisites
Python distribution
```
Anaconda
```
Installing
Install Anaconda python distribution on your system
Create a virtual environment called env.
```
python -m venv app
```
Activate the virtual environment
```
LINUX/Mac: source app/bin/activate
Windows: app\Scripts\activate
```
Upgrade to the latest pip
```
pip install --upgrade pip
```
Install dependencies using requirements file
```
pip install -r requirements.txt
```
**Note: Your virtual environment must always be activated before running
any command**
Deployment
Start app (Make sure to enter a valid website to an existing website)
Example of valid commands
```
python app.py simple --url https://facebook.com --sentence 1 --language
english
python app.py simple --url https://facebook.com
python app.py simple --url https://korapay.com
python app.py bulk --path ./csv/valid_websites.csv
```
APIs
This are command options in full:
```
A command line utility for website Summarization.
-----------------------------------------------
These are common commands for this app.
positional arguments:
action This has to be 'summarize'
optional arguments:
-h, --help show this help message and exit
--website PATH website of the url to be summarised
Requirements:
utils==1.0.1
sumeval==0.2.2
tensorflow==2.3.0
wget==3.2
sumy==0.8.1
model==0.6.0
numpy==1.19.1
newspaper==0.1.0.7
nltk==3.5
gensim==3.8.3
Source Code:
#!/usr/bin/python
from utils.summarize import summarize
import csv
import shutil
import os
import textwrap
import logging
import argparse
import sys
def parse_args(argv):
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent('''\
A command line utility for website summarization.
-----------------------------------------------
These are common commands for this app.'''))
parser.add_argument(
'action',
help='This action should be summarize')
parser.add_argument(
'--url',
help='A link to the website url'
)
parser.add_argument(
'--sentence',
help='Argument to define number of sentence for the summary',
type=int,
default=2)
parser.add_argument(
'--language',
help='Argument to define language of the summary',
default='English')
parser.add_argument(
'--path',
help='path to csv file')
return parser.parse_args(argv[1:])
def readCsv(path):
print('\n\n Processing Csv file \n\n')
sys.stdout.flush()
data = []
try:
with open(path, 'r') as userFile:
userFileReader = csv.reader(userFile)
for row in userFileReader:
data.append(row)
except:
with open(path, 'r', encoding="mbcs") as userFile:
userFileReader = csv.reader(userFile)
for row in userFileReader:
data.append(row)
return data
def writeCsv(data, LANGUAGE, SENTENCES_COUNT):
print('\n\n Updating Csv file \n\n')
sys.stdout.flush()
with open('beneficiary.csv', 'w') as newFile:
newFileWriter = csv.writer(newFile)
length = len(data)
position = data[0].index('website')
for i in range(1, length):
if i == 1:
_data = data[0]
_data.append("summary")
newFileWriter.writerow(_data)
try:
__data = data[i]
summary = summarize(
(data[i][position]), LANGUAGE, SENTENCES_COUNT)
__data.append(summary)
newFileWriter.writerow(__data)
except:
print('\n\n Error Skipping line \n\n')
sys.stdout.flush()
def processCsv(path, LANGUAGE, SENTENCES_COUNT):
try:
print('\n\n Proessing Started \n\n')
sys.stdout.flush()
data = readCsv(path)
writeCsv(data, LANGUAGE, SENTENCES_COUNT)
except:
print('\n\n Invalid file in file path \n\n')
sys.stdout.flush()
def main(argv=sys.argv):
# Configure logging
logging.basicConfig(filename='applog.log',
filemode='w',
level=logging.INFO,
format='%(levelname)s:%(message)s')
args = parse_args(argv)
action = args.action
url = args.url
path = args.path
LANGUAGE = "english" if args.language is None else args.language
SENTENCES_COUNT = 2 if args.sentence is None else args.sentence
if action == 'bulk':
if path is None:
print(
'\n\n Invalid Entry!, please Ensure you enter a valid file path \n\n')
sys.stdout.flush()
return
# guide against errors
try:
processCsv(path, LANGUAGE, SENTENCES_COUNT)
except:
print(
'\n\n Invalid Entry!, please Ensure you enter a valid file path \n\n')
sys.stdout.flush()
print('Completed')
sys.stdout.flush()
if os.path.isfile('beneficiary.csv'):
return shutil.move('beneficiary.csv', path)
return
if action == 'simple':
# guide against errors
try:
summarize(url, LANGUAGE, SENTENCES_COUNT)
except:
print(
'\n\n Invalid Entry!, please Ensure you enter a valid web link \n\n')
sys.stdout.flush()
print('Completed')
sys.stdout.flush()
else:
print(
'\nAction command is not supported\n for help: run python3 app.py -
h'
)
sys.stdout.flush()
return
if __name__ == '__main__':
main()
0 Comments