Solved

running Go program on Windows 7

Posted on 2014-04-19
1
382 Views
Last Modified: 2014-04-23
i'm new to Go programming.

I am running a Windows 7 desktop pc
I setup my environment variables and path and was able to run the hello world program just fine.

Now, I'm trying the example located at this link:

https://gist.github.com/fastest963/10978299

So this is the code for iocrawler.go:
//before running make sure you setup a GOPATH env variable and ran: "go get code.google.com/p/go.net/html"
 
//to run: go run ioCrawler.go -url="http://developers.google.com/"
//also try http://developer.android.com/index.html
//output goo.gl links to try and redeem will be sent to foundLinks.txt
 
//by the way there's an artificial "rate limit" in func crawler, you can lower that or raise it as you wish
//You can also comment out the onlyGoogleRegex code if you don't want to limit to google.com/youtube
//if you're getting I/O timeout errors, then you might need to increase the timeouts on line 231
 
package main
 
import (
"code.google.com/p/go.net/html"
"errors"
"flag"
"fmt"
"io/ioutil"
"net"
"net/http"
"net/url"
"os"
"os/signal"
"regexp"
"strings"
"time"
"sync"
"math/rand"
)
 
//prevent from downloading files we can't even read
var extToIgnore = regexp.MustCompile(`(?i)\.(gz|tar|tgz|bz2|jpg|png|jpeg|gif|psd|mp3|webm|ogg|mp4|mpeg|flv|avi|wmv|fla|mov|bin|wav|rpm|dmg|exe|zip|rar|7z|doc|ppt|pdf|iso|torrent)$`)
 
//if you want to index more domains feel free to update this or remove it entirely (update validateUri too to remove the check at the bottom)
var onlyGoogleRegex = regexp.MustCompile(`(?i)(google|youtube|android|golang|opers.blogspot|oogle.blogspot|youtu|angularjs|angulardart|dartlang|html5rocks)\.(com|org|net|io|be)`)
 
var googlRegex = regexp.MustCompile(`(?im)((?:goo\.gl/[a-zA-Z0-9]{3,9})|(?:[a-zA-Z0-9]{3,9}(?:/|\\)lg\.oog))`)
 
//if you're getting a lot of I/O (pun not intended...) timeouts then raise this
var defaultConnectTimeout = time.Duration(2*time.Second)
var defaultRWTimeout = time.Duration(5*time.Second)
 
var guessTimeout = time.Duration(1000*time.Millisecond)
var rateLimitedGoog bool
 
var queuedLock = sync.RWMutex{}
var knownLock = sync.RWMutex{}
var knownGooglLock = sync.RWMutex{}
var queuedGooglLock = sync.RWMutex{}
 
var queuedUris = map[string]bool{}
var knownUris = map[string]bool{}
var queuedGoogl = map[string]bool{}
var knownGoogl = map[string]bool{
"goo.gl/ZV63Cr": true,
"goo.gl/1nToQ4": true,
"goo.gl/q7wKp8": true,
"goo.gl/dDZIsO": true,
"goo.gl/R1jVTj": true,
"goo.gl/B92rvj": true,
"goo.gl/Au49Um": true,
"goo.gl/uQvdDQ": true,
"goo.gl/KBXDW9": true,
"goo.gl/m3uwvU": true,
"goo.gl/bhLDq0": true,
"goo.gl/gdEknO": true,
"goo.gl/8AUTRP": true,
"goo.gl/5nz6cg": true,
"goo.gl/T3mzjk": true,
"goo.gl/5FwCJy": true,
"goo.gl/3QGGK0": true,
"goo.gl/K6E7l8": true,
"goo.gl/xZM9jt": true,
"goo.gl/rMFYNX": true,
"goo.gl/F3noUV": true,
"goo.gl/JYSlLQ": true,
"goo.gl/hmgbrW": true,
"goo.gl/0gNnSE": true,
"goo.gl/Tw6kP9": true,
"goo.gl/RB2zhx": true,
"goo.gl/kJPqT3": true,
"goo.gl/qllLsG": true,
"goo.gl/zj9bO0": true,
"goo.gl/bjAPLg": true,
"goo.gl/7tKTyd": true,
"goo.gl/eXeVao": true,
"goo.gl/Dpoyqi": true,
"goo.gl/iztHMd": true,
"goo.gl/HACUad": true,
"goo.gl/oJDnYs": true,
"goo.gl/ia7jLd": true,
"goo.gl/AJRPcI": true,
"goo.gl/bS7lhD": true,
"goo.gl/lyBKa0": true,
"goo.gl/OmM6DN": true,
"goo.gl/puUsyX": true,
"goo.gl/Gc8Dsd": true,
"goo.gl/P3cEUu": true,
"goo.gl/9lT4ZV": true,
"goo.gl/HNA9YG": true,
"goo.gl/xjP4Ke": true,
"goo.gl/ykmzUC": true,
"goo.gl/EOiis9": true,
"goo.gl/wgzU7N": true,
"goo.gl/5ixjt4": true,
"goo.gl/L2N5sY": true,
"goo.gl/SWNo61": true,
"goo.gl/06Hov3": true,
"goo.gl/JOzJ9j": true,
"goo.gl/8JoXn0": true,
"goo.gl/y1y9Lh": true,
"goo.gl/FDTdEx": true,
"goo.gl/LgqaL0": true,
"goo.gl/OQwgds": true,
"goo.gl/hZWUau": true,
"goo.gl/wfkUr7": true,
"goo.gl/sK6kAA": true,
"goo.gl/DpYUwO": true,
"goo.gl/vLppsl": true,
"goo.gl/KVDTVy": true,
"goo.gl/dGrMwY": true,
"goo.gl/YOTeO1": true,
"goo.gl/PltckZ": true,
"goo.gl/sUJ1r7": true,
"goo.gl/okXi4W": true,
"goo.gl/ENWVvN": true,
"goo.gl/BcLOM9": true,
"goo.gl/R9MgF3": true,
"goo.gl/egnhgc": true,
"goo.gl/Jca6KP": true,
"goo.gl/1AiXEk": true,
"goo.gl/iMmkIj": true,
"goo.gl/E1vTJk": true,
"goo.gl/pikTTi": true,
"goo.gl/9KK0k0": true,
"goo.gl/Oof8mG": true,
"goo.gl/X1R54k": true,
"goo.gl/ykH2OP": true,
"goo.gl/cuSgmK": true,
"goo.gl/I6Lpbz": true,
"goo.gl/JFKDGw": true,
"goo.gl/o0YxL2": true,
"goo.gl/cCpQrU": true,
"goo.gl/sWHuKQ": true,
"goo.gl/FhGPJW": true,
"goo.gl/v6jFEL": true,
"goo.gl/hBKyB5": true,
"goo.gl/mH9rXs": true,
"goo.gl/3QXZ9r": true,
"goo.gl/6M31VD": true,
"goo.gl/ZaQQyr": true,
"goo.gl/FbPhW1": true,
"goo.gl/EePxH9": true,
"goo.gl/LLy4UO": true,
"goo.gl/yGQsaV": true,
"goo.gl/MtXamT": true,
"goo.gl/SaKrk1": true,
"goo.gl/Jtc6vr": true,
}
var uriQueue = make(chan *url.URL, 16384)
var googlQueue = make(chan *url.URL, 4096)
 
var googlOutputFile *os.File
var shouldGuess bool
 
func main() {
startURL := flag.String("url", "", "the url to start crawling")
guess := flag.Bool("guess", false, "guess goo.gl links")
flag.Parse()
shouldGuess = *guess
startUri, err := url.Parse(*startURL)
if err != nil || *startURL == "" {
fmt.Printf("Failed to parse starting URL: %s. Response: %s\n", *startURL, err)
return
}
 
googlOutputFile, err = os.OpenFile("./foundLinks.txt", os.O_APPEND | os.O_CREATE | os.O_WRONLY, 0644)
if err != nil {
fmt.Printf("Failed to open foundLinks.txt. Error: %s\n", err)
return
}
defer func() {
if err := googlOutputFile.Close(); err != nil {
fmt.Printf("Failed to close output file: %s\n", err)
}
}()
 
for i := 0; i < 48; i++ {
go crawler()
}
for i := 0; i < 8; i++ {
go googlChecker()
}
for i := 0; i < 1; i++ {
go guesser()
}
//fakeGoogl, _ := url.Parse("http://goo.gl/ZV63Cr")
//googlQueue <- fakeGoogl
uriQueue <- startUri
waitForSignal()
}
 
func crawler() {
var nextUri *url.URL
for {
<-time.After(500*time.Millisecond)
select {
case nextUri = <-uriQueue:
crawl(nextUri)
}
}
}
 
func googlChecker() {
var nextUri *url.URL
for {
if rateLimitedGoog {
fmt.Println("RATE LIMITED!! Waiting...")
<-time.After(120*time.Second)
}
select {
case nextUri = <-googlQueue:
verifyGoogl(nextUri)
}
}
}
 
func normalizeUri(uri *url.URL) string {
host := uri.Host
//remove www from the host
if len(host) > 4 && host[0:4] == "www." {
host = host[4:]
}
path := uri.Path
if path == "" || path == "/" {
return host
}
pathLen := len(path)
//remove any trailing slashes
if path[pathLen-1] == '/' {
path = path[:pathLen-1]
}
//unfortunately including RawQuery in here to fix youtube links not working anymore
return strings.Join([]string{host, path, uri.RawQuery}, "")
}
 
 
func validateUri(uri *url.URL) (bool, string) {
if uri.Scheme != "http" && uri.Scheme != "https" {
return false, ""
}
normalUri := normalizeUri(uri)
knownLock.RLock()
if _, ok := knownUris[normalUri]; ok {
knownLock.RUnlock()
//already checked this
return false, normalUri
}
knownLock.RUnlock()
if uri.Host == "" {
return false, normalUri
}
if extToIgnore.MatchString(uri.Path) {
return false, normalUri
}
if !onlyGoogleRegex.MatchString(uri.Host) {
return false, normalUri
}
return true, normalUri
}
 
//from http://stackoverflow.com/questions/16895294/how-to-set-timeout-for-http-get-requests-in-golang
func TimeoutDialer(cTimeout time.Duration, rwTimeout time.Duration) func(net, addr string) (c net.Conn, err error) {
return func(netw, addr string) (net.Conn, error) {
conn, err := net.DialTimeout(netw, addr, cTimeout)
if err != nil {
return nil, err
}
conn.SetDeadline(time.Now().Add(rwTimeout))
return conn, nil
}
}
func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration, baseNormalUri string) *http.Client {
return &http.Client{
Transport: &http.Transport{
Dial: TimeoutDialer(connectTimeout, readWriteTimeout),
},
CheckRedirect: func(req *http.Request, via []*http.Request) error {
valid, newNormalUri := validateUri(req.URL)
//allow redirects to the *same* url since we might already have this url in knownUris because we're processing it right now
if !valid && baseNormalUri != newNormalUri {
return errors.New("Redirect:Invalid")
}
return nil
},
}
}
 
func crawl(uri *url.URL) {
valid, normalUri := validateUri(uri)
if !valid || uri == nil {
queuedLock.Lock()
delete(queuedUris, normalUri)
queuedLock.Unlock()
return
}
knownLock.Lock()
knownUris[normalUri] = true
knownLock.Unlock()
 
queuedLock.Lock()
delete(queuedUris, normalUri)
queuedLock.Unlock()
 
findYoutubeAnnotations(uri)
 
urlStr := uri.String()
fmt.Printf("Fetching: %s\n", urlStr)
client := NewTimeoutClient(defaultConnectTimeout, defaultRWTimeout, normalUri)
resp, err := client.Get(urlStr)
if err != nil {
var errMsg string
urlErr, ok := err.(*url.Error)
if !ok {
errMsg = err.Error()
} else {
errMsg = urlErr.Err.Error()
}
 
if errMsg != "Redirect:Invalid" {
fmt.Printf("Failed to Get URL: %s. Response: %s\n", urlStr, errMsg)
}
knownLock.Lock()
delete(knownUris, normalUri)
knownLock.Unlock()
return
}
htmlBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Printf("Failed to read from server to get full HTML for %s. Error: %s\n", urlStr, err)
knownLock.Lock()
delete(knownUris, normalUri)
knownLock.Unlock()
return
}
resp.Body.Close()
htmlStr := string(htmlBytes)
locateAndSaveGoogl(htmlStr)
 
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil {
fmt.Printf("Failed to parse HTML for %s. Error: %s\n", urlStr, err)
return
}
findAnchors(doc, uri)
}
 
func findAnchors(node *html.Node, baseUri *url.URL) {
if node.Type == html.ElementNode && (node.Data == "a" || node.Data == "url") {
for _, attr := range node.Attr {
if (node.Data == "a" && attr.Key == "href") || (node.Data == "url" && attr.Key == "value") {
newUri, err := url.Parse(attr.Val)
if err != nil || newUri == nil {
fmt.Printf("Invalid url detected: %s. Error %s\n", attr.Val, err)
break
}
if !newUri.IsAbs() {
newUri = baseUri.ResolveReference(newUri)
}
valid, newNorm := validateUri(newUri)
if valid {
//fmt.Printf("Found new URL! %s\n", newUri.String())
queuedLock.Lock()
if _, ok := queuedUris[newNorm]; !ok {
queuedUris[newNorm] = true
queuedLock.Unlock()
 
select {
case uriQueue <- newUri:
case <-time.After(time.Second * 1):
//fmt.Println("Queue is full!!")
return
}
} else {
queuedLock.Unlock()
}
}
break
}
}
} else if node.Type == html.ElementNode && node.Data == "head" {
//skip looping through head
node = node.NextSibling
if node == nil {
return
}
}
for nextNode := node.FirstChild; nextNode != nil; nextNode = nextNode.NextSibling {
findAnchors(nextNode, baseUri)
}
}
 
func findYoutubeAnnotations(uri *url.URL) {
if (uri.Host == "youtube.com" || uri.Host == "www.youtube.com") {
if len(uri.Path) < 6 || uri.Path[0:6] != "/watch" {
return
}
queryVals := uri.Query();
if queryVals == nil {
return
}
videoIDs, ok := queryVals["v"]
if !ok || len(videoIDs) < 1 {
videoIDsCommas, ok := queryVals["video_ids"]
if !ok || len(videoIDsCommas) < 1 {
return
}
videoIDs = strings.Split(videoIDsCommas[0], ",")
}
for _, videoID := range videoIDs {
addYoutubeAnnotations(videoID)
}
} else if (uri.Host == "youtu.be") {
addYoutubeAnnotations(uri.Path);
} else {
return
}
}
 
//thanks https://plus.google.com/+MarcLesterTan
func addYoutubeAnnotations(videoID string) {
if len(videoID) < 5 {
return
}
newUri, err := url.Parse("https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=" + videoID)
if err != nil || newUri == nil {
fmt.Printf("Invalid youtube annotations url generated. Error %s\n", err)
return
}
valid, newNorm := validateUri(newUri)
if valid {
//fmt.Printf("Added new youtube annotations URL! %s\n", newUri.String())
queuedLock.Lock()
if _, ok := queuedUris[newNorm]; !ok {
queuedUris[newNorm] = true
queuedLock.Unlock()
 
select {
case uriQueue <- newUri:
case <-time.After(time.Second * 1):
//fmt.Println("Queue is full!!")
return
}
} else {
queuedLock.Unlock()
}
}
}
 
func ReverseString(str string) string {
i := len(str)
final := make([]rune, i)
for _, r := range str {
i--
final[i] = r
}
return string(final)
}
 
func locateAndSaveGoogl(htmlStr string) {
matches := googlRegex.FindAllString(htmlStr, -1)
if matches == nil {
return
}
for _, match := range matches {
//detect backwards goo.gl links
if len(match) > 6 && match[len(match)-6:] == "lg.oog" {
match = ReverseString(match)
}
//fmt.Printf("Found goo.gl link!: %s\n", match)
//prevent backwards slashes from breaking this
uri, err := url.Parse("http://" + strings.Replace(match, `\`, "/", -1))
if err != nil {
fmt.Printf("Failed to parse goo.gl link: %s. Error: %s\n", match, err)
continue
}
normalUri := normalizeUri(uri)
knownGooglLock.RLock()
if _, ok := knownGoogl[normalUri]; ok {
knownGooglLock.RUnlock()
continue
}
knownGooglLock.RUnlock()
 
queuedGooglLock.Lock()
if _, ok := queuedGoogl[normalUri]; ok {
queuedGooglLock.Unlock()
continue
}
queuedGoogl[normalUri] = true
queuedGooglLock.Unlock()
 
googlQueue <- uri
}
}
 
var validGooglChars = "avE1k4yiQhAzl9B6DpIRdCqnF8tbSr7J2NKGT3VmoMPuc5UHeLjOZWxfwXgsY0"
 
func guesser() { //s float64
if !shouldGuess {
return
}
 
var i int
n := make([]uint8, 6)
for {
if rateLimitedGoog {
<-time.After(180*time.Second)
continue
}
 
i = 0
for i < 6 {
n[i] = validGooglChars[int(rand.Float64() * 62)]
i++
}
normal := "goo.gl/" + string(n)
knownGooglLock.RLock()
if _, ok := knownGoogl[normal]; ok {
knownGooglLock.RUnlock()
continue
}
knownGooglLock.RUnlock()
 
queuedGooglLock.Lock()
if _, ok := queuedGoogl[normal]; ok {
queuedGooglLock.Unlock()
continue
}
queuedGoogl[normal] = true
queuedGooglLock.Unlock()
 
uri, err := url.Parse("http://" + normal)
if err != nil {
fmt.Printf("Failed to generate valid goo.gl link: %s. Error: %s\n", normal, err)
continue
}
if guessTimeout > 0 {
<-time.After(guessTimeout)
}
googlQueue <- uri
}
}
 
var googlClient = &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
// /events/io/2014/redeem
//yes... this is ghetto how it works
if req.URL.Host == "developers.google.com" && len(req.URL.Path) > 22 && req.URL.Path[0:22] == "/events/io/2014/redeem" {
return errors.New("Redirect:Valid")
}
return errors.New("Redirect:Invalid")
},
}
 
func verifyGoogl(uri *url.URL) {
normalUri := normalizeUri(uri)
 
queuedGooglLock.Lock()
delete(queuedGoogl, normalUri)
queuedGooglLock.Unlock()
 
knownGooglLock.Lock()
if _, ok := knownGoogl[normalUri]; ok {
knownGooglLock.Unlock()
return
}
knownGoogl[normalUri] = true
knownGooglLock.Unlock()
 
urlStr := uri.String()
fmt.Println("Fetching goo.gl link: " + urlStr)
resp, err := googlClient.Head(urlStr)
if err != nil {
var errMsg string
urlErr, ok := err.(*url.Error)
if !ok {
errMsg = err.Error()
} else {
errMsg = urlErr.Err.Error()
}
 
if errMsg == "Redirect:Valid" {
rateLimitedGoog = false
fmt.Printf("Found a valid goo.gl redirect!! %s\n", urlStr)
 
if _, err := googlOutputFile.WriteString(urlStr + "\n"); err != nil {
fmt.Printf("ERROR! Failed to write goo.gl redirect to output file! Error: %s/n", err)
}
} else if errMsg == "Redirect:Invalid" {
rateLimitedGoog = false
//do nothing
} else {
fmt.Printf("Failed to Head URL: %s. Response: %s\n", urlStr, errMsg)
}
}
if resp.StatusCode == 403 {
rateLimitedGoog = true
} else {
rateLimitedGoog = false
}
}
 
func waitForSignal() {
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt)
<-c
fmt.Println("Received SIGINT!")
}

Open in new window


So when i enter the command at my dos command prompt:  go build
I get this message.

error message on build
Anyone know why I'm getting this message or what am i doing wrong to run this program?
0
Comment
Question by:maqskywalker
1 Comment
 
LVL 14

Accepted Solution

by:
brendanmeyer earned 500 total points
ID: 40011809
have you run the "go get code.google.com/p/go.net/html" command to get the go.net/html package?
0

Featured Post

Free Tool: SSL Checker

Scans your site and returns information about your SSL implementation and certificate. Helpful for debugging and validating your SSL configuration.

One of a set of tools we are providing to everyone as a way of saying thank you for being a part of the community.

Question has a verified solution.

If you are experiencing a similar issue, please ask a related question

Suggested Solutions

Title # Comments Views Activity
Modify a small python script 19 116
Programatically extract date from website 8 79
MacOS and programming in React 7 41
Removing line numbers from left column in config file 7 61
This is about my first experience with programming Arduino.
Although it can be difficult to imagine, someday your child will have a career of his or her own. He or she will likely start a family, buy a home and start having their own children. So, while being a kid is still extremely important, it’s also …
The viewer will learn how to use the return statement in functions in C++. The video will also teach the user how to pass data to a function and have the function return data back for further processing.
With the power of JIRA, there's an unlimited number of ways you can customize it, use it and benefit from it. With that in mind, there's bound to be things that I wasn't able to cover in this course. With this summary we'll look at some places to go…

856 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question